# Pommerman Demo.

This notebook demonstrates how to train Pommerman agents. Please let us know at support@pommerman.com if you run into any issues.

In [74]:
import os
import sys
import numpy as np
import time
import itertools
import random

import pommerman
from pommerman.agents import SimpleAgent, RandomAgent, PlayerAgent, BaseAgent, SmartRandomAgent, StaticAgent, SmartRandomAgentNoBomb
from pommerman.configs import ffa_v0_fast_env, lesson1_env, lesson2_env, lesson2b_env, lesson2c_env, lesson2d_env, lesson2e_env, lesson3_env, lesson3b_env, lesson3c_env, lesson3d_env, ffa_v4_fast_env
from pommerman.envs.v0 import Pomme as Pomme_v0
from pommerman.envs.v4 import Pomme as Pomme_v4
from pommerman.characters import Bomber
from pommerman import utility
from pommerman import agents
from pommerman import envs
from pommerman import constants
from pommerman import characters

# print all env configs
print(pommerman.REGISTRY)

['AdvancedLesson-v0', 'PommeFFACompetition-v0', 'PommeFFACompetitionFast-v0', 'PommeFFAFast-v0', 'PommeFFA-v1', 'PommeFFAFast-v3', 'PommeFFAFast-v4', 'Lesson1-v0', 'Lesson2-v0', 'Lesson2b-v0', 'Lesson2c-v0', 'Lesson2d-v0', 'Lesson2e-v0', 'Lesson3-v0', 'Lesson3b-v0', 'Lesson3c-v0', 'Lesson3d-v0', 'OneVsOne-v0', 'PommeRadioCompetition-v2', 'PommeRadio-v2', 'Simple-v0', 'SimpleRandomTeam-v0', 'SimpleTeam-v0', 'PommeTeamCompetition-v0', 'PommeTeamCompetitionFast-v0', 'PommeTeamCompetition-v1', 'PommeTeam-v0', 'PommeTeamFast-v0', 'PommeTeamSimple-v0']


# Train with stable baseline

In [2]:
import gym

from stable_baselines.common.policies import MlpPolicy, CnnLstmPolicy, MlpLstmPolicy
from stable_baselines.common.vec_env import SubprocVecEnv, DummyVecEnv
from stable_baselines import PPO2

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



## Inherit pommerman env and make it compatible with stable-baseline

In [87]:
class CustomPomme(Pomme_v0):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.obs_raw = None # store the raw version of observation
        self.training_idx = 1 # idx of the agent being trained
        self.time_step_reward = kwargs["time_step_reward"] # reward for each time step
        self.wood_reward = kwargs["wood_reward"] # reward for destroying each box
        
    
    # function to flatten pommerman observation
    def _transform_obs(self, obs_raw):
        obs_training = obs_raw[self.training_idx] # default the first agent to be trained

        # construct flattened observation
        obs = [
            *np.array(obs_training["board"]).reshape(-1),
            *np.array(obs_training["bomb_blast_strength"]).reshape(-1),
            *np.array(obs_training["bomb_life"]).reshape(-1),
            *np.array(obs_training["position"]).reshape(-1),
            obs_training["ammo"],
            obs_training["blast_strength"],
            obs_training["can_kick"],
            obs_training["teammate"].value,
            obs_training["enemies"][0].value,
            
            # uncommon if training 1 v 1
            obs_training["enemies"][0].value,
            obs_training["enemies"][0].value,
            
            # uncommon if training 2 v 2
#             obs_training["enemies"][1].value,
#             obs_training["enemies"][2].value,
        ]
        return obs
    
    # count the number of all elements in an np array
    def _ele_counter(self, a):
        unique, counts = np.unique(a, return_counts=True)
        return dict(zip(unique, counts))
    
    def get_obs_raw(self):
        return self.obs_raw

    def step(self, action_training):
        # obtain action of oponent agents
        action_nontraining = self.act(self.obs_raw)
        
        # combine actions
        actions = [*action_nontraining, action_training]
        
        # step
        obs_raw, reward, done, info = super().step(actions)
        reward_training = reward[self.training_idx]
        self.obs_raw = obs_raw

        #######################################################################################################
        ### tweak reward system for wooden box collecting problem, comment if training for original problem ###
        
        obs_training = obs_raw[self.training_idx]
#         display(obs_training)
        
        # count number of wood
        board = np.array(obs_training["board"])
        ele_count = self._ele_counter(board)
        curr_num_wood = 0
        if 2 in ele_count:
            curr_num_wood = ele_count[2] # number of wood in current time step, 2 represents wood
        
        # calculate reward
        time_step = obs_training["step_count"] # number of time step in a single game
        reward_training += (self._num_wood - curr_num_wood) * self.wood_reward + time_step * self.time_step_reward
        
        # if the game end with tie, gives -1 reward
        if "result" in info:
            if(info["result"].value == 2):
                reward_training += -1
            
#         print("reward_training = (self._num_wood - curr_num_wood) * self.wood_reward + time_step * self.time_step_reward: ")
#         print(reward_training, " = (", self._num_wood, " - ", curr_num_wood, ")", " * ", self.wood_reward, " + ", time_step, " * ", self.time_step_reward)
#         print()
        #######################################################################################################
        
        return self._transform_obs(obs_raw), reward_training, done, info
    
    def reset(self):
        obs_raw = super().reset()
        self.obs_raw = obs_raw
        return self._transform_obs(obs_raw)
    
    def render(self,
               mode=None,
               close=False,
               record_pngs_dir=None,
               record_json_dir=None,
               do_sleep=True):
        super().render(mode=mode,
                       close=close,
                       record_pngs_dir=record_pngs_dir,
                       record_json_dir=record_json_dir,
                       do_sleep=do_sleep)

## Custom CNN Policy

In [88]:
import tensorflow as tf

from stable_baselines.a2c.utils import linear
from stable_baselines.common.policies import ActorCriticPolicy

class CustomCNN(ActorCriticPolicy):
    def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, **kwargs):
        super(CustomCNN, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=reuse)
        size=11
        bp = 3*size**2 #board partition
        with tf.variable_scope("model", reuse=reuse):
            obs = self.processed_obs
            self.board1, self.misc = tf.split(obs, [bp, -1], 1)
            
            self.board = tf.reshape(self.board1, (-1, size, size, 3))
            self.conv1 = tf.layers.conv2d(self.board, 64, 2, activation=tf.nn.relu, name='conv1')
            self.conv2 = tf.layers.conv2d(self.conv1, 32, 2, activation=tf.nn.relu, name='conv2')
            self.fc0 = tf.contrib.layers.flatten(self.conv2)
            self.fc1 = tf.concat((self.fc0, self.misc), -1)
            self.fc1 = tf.layers.dense(self.fc1, 1024, name = 'fc1')
            self.actions = tf.layers.dense(self.fc1, 6)   
            self.valueUM = tf.layers.dense(self.fc1, 128) #??

            self._proba_distribution, self._policy, self.q_value = \
                self.pdtype.proba_distribution_from_latent(self.actions, self.valueUM, init_scale=0.01)

        self._value_fn = linear(self.valueUM, 'vf', 1)
        self._setup_init()

    def step(self, obs, state=None, mask=None, deterministic=False):
        if deterministic:
            action, value, neglogp = self.sess.run([self.deterministic_action, self.value_flat, self.neglogp],
                                                   {self.obs_ph: obs})
        else:
            action, value, neglogp = self.sess.run([self.action, self.value_flat, self.neglogp],
                                                   {self.obs_ph: obs})
        return action, value, self.initial_state, neglogp

    def proba_step(self, obs, state=None, mask=None):
        return self.sess.run(self.policy_proba, {self.obs_ph: obs})

    def value(self, obs, state=None, mask=None):
        return self.sess.run(self.value_flat, {self.obs_ph: obs})

## Demo envs

In [89]:
def team_v3_fast_env():
    """Start up a FFA config with the default settings."""
    env = CustomPomme
    game_type = constants.GameType.Team
    env_entry_point = 'CustomPomme'
    env_id = 'PommeTeamFast-v3'
    env_kwargs = {
        'game_type': game_type,
        'board_size': 8,
        'num_rigid': 0,
        'num_wood': 0,
        'num_items': 0,
        'max_steps': constants.MAX_STEPS,
        'render_fps': 1000,
        'env': env_entry_point,
        "wood_reward": 0.01, # added for box collecting env
        "time_step_reward": -0.001, # added for box collecting env
        "rand_agent_pos": True # randomize initial position of agent
    }
    agent = characters.Bomber
    return locals()

def one_vs_one_v3_env():
    """Start up a FFA config with the default settings."""
    env = CustomPomme
    game_type = constants.GameType.OneVsOne
    env_entry_point = 'CustomPomme'
    env_id = 'PommeOneVsOneFast-v3'
    env_kwargs = {
        'game_type': game_type,
        'board_size': 8,
        'num_rigid': 0,
        'num_wood': 0,
        'num_items': 0,
        'max_steps': constants.MAX_STEPS,
        'render_fps': 1000,
        'env': env_entry_point,
        "wood_reward": 0.01, # added for box collecting env
        "time_step_reward": -0.001, # added for box collecting env
        "rand_agent_pos": True # randomize initial position of agent
    }
    agent = characters.Bomber
    return locals()

def one_vs_one_v4_env():
    """Start up a FFA config with the default settings."""
    env = CustomPomme
    game_type = constants.GameType.OneVsOne
    env_entry_point = 'CustomPomme'
    env_id = 'PommeOneVsOneFast-v4'
    env_kwargs = {
        'game_type': game_type,
        'board_size': 8,
        'free_board_size': constants.FREE_BOARD_SIZE,
        'num_rigid': 0,
        'num_wood': 0,
        'num_items': 0,
        'max_steps': constants.MAX_STEPS,
        'render_fps': 1000,
        'env': env_entry_point,
        "wood_reward": 0.01, # added for box collecting env
        "time_step_reward": -0.001, # added for box collecting env
        "rand_agent_pos": True # randomize initial position of agent
    }
    agent = characters.Bomber
    return locals()

## Wooden Box collecting curriculum training envs

In [90]:
env_num_woods = [8, 10, 12, 14, 16, 18] # number of woods for lesson 0, 1, 2, 3, 4, 5
# env_wood_reward = [pow(2, i) for i in range(-1, -7, -1)] # reward for destroying each box
env_wood_reward = [0.1/x for x in env_num_woods] # reward for destroying each box
board_size = 11

def wood_box_lesson0_env():
    """Start up a FFA config with the default settings."""
    env = CustomPomme
    game_type = constants.GameType.OneVsOne
    env_entry_point = 'CustomPomme'
    env_id = 'WoodBoxLesson0'
    env_kwargs = {
        'game_type': game_type,
        'board_size': board_size,
        'free_board_size': constants.FREE_BOARD_SIZE,
        'num_rigid': 0,
        'num_wood': env_num_woods[0],
        'num_items': 0,
        'max_steps': constants.MAX_STEPS,
        'render_fps': 1000,
        'env': env_entry_point,
        "wood_reward": env_wood_reward[0], # added for box collecting env
        "time_step_reward": -0.001, # added for box collecting env
        "rand_agent_pos": True # randomize initial position of agent
    }
    agent = characters.Bomber
    return locals()

def wood_box_lesson1_env():
    """Start up a FFA config with the default settings."""
    env = CustomPomme
    game_type = constants.GameType.OneVsOne
    env_entry_point = 'CustomPomme'
    env_id = 'WoodBoxLesson1'
    env_kwargs = {
        'game_type': game_type,
        'board_size': board_size,
        'free_board_size': constants.FREE_BOARD_SIZE,
        'num_rigid': 0,
        'num_wood': env_num_woods[1],
        'num_items': 0,
        'max_steps': constants.MAX_STEPS,
        'render_fps': 1000,
        'env': env_entry_point,
        "wood_reward": env_wood_reward[1], # added for box collecting env
        "time_step_reward": -0.001, # added for box collecting env
        "rand_agent_pos": True # randomize initial position of agent
    }
    agent = characters.Bomber
    return locals()

def wood_box_lesson2_env():
    """Start up a FFA config with the default settings."""
    env = CustomPomme
    game_type = constants.GameType.OneVsOne
    env_entry_point = 'CustomPomme'
    env_id = 'WoodBoxLesson2'
    env_kwargs = {
        'game_type': game_type,
        'board_size': board_size,
        'free_board_size': constants.FREE_BOARD_SIZE,
        'num_rigid': 0,
        'num_wood': env_num_woods[2],
        'num_items': 0,
        'max_steps': constants.MAX_STEPS,
        'render_fps': 1000,
        'env': env_entry_point,
        "wood_reward": env_wood_reward[2], # added for box collecting env
        "time_step_reward": -0.001, # added for box collecting env
        "rand_agent_pos": True # randomize initial position of agent
    }
    agent = characters.Bomber
    return locals()

def wood_box_lesson3_env():
    """Start up a FFA config with the default settings."""
    env = CustomPomme
    game_type = constants.GameType.OneVsOne
    env_entry_point = 'CustomPomme'
    env_id = 'WoodBoxLesson3'
    env_kwargs = {
        'game_type': game_type,
        'board_size': board_size,
        'free_board_size': constants.FREE_BOARD_SIZE,
        'num_rigid': 0,
        'num_wood': env_num_woods[3],
        'num_items': 0,
        'max_steps': constants.MAX_STEPS,
        'render_fps': 1000,
        'env': env_entry_point,
        "wood_reward": env_wood_reward[3], # added for box collecting env
        "time_step_reward": -0.001, # added for box collecting env
        "rand_agent_pos": True # randomize initial position of agent
    }
    agent = characters.Bomber
    return locals()

def wood_box_lesson4_env():
    """Start up a FFA config with the default settings."""
    env = CustomPomme
    game_type = constants.GameType.OneVsOne
    env_entry_point = 'CustomPomme'
    env_id = 'WoodBoxLesson4'
    env_kwargs = {
        'game_type': game_type,
        'board_size': board_size,
        'free_board_size': constants.FREE_BOARD_SIZE,
        'num_rigid': 0,
        'num_wood': env_num_woods[4],
        'num_items': 0,
        'max_steps': constants.MAX_STEPS,
        'render_fps': 1000,
        'env': env_entry_point,
        "wood_reward": env_wood_reward[4], # added for box collecting env
        "time_step_reward": -0.001, # added for box collecting env
        "rand_agent_pos": True # randomize initial position of agent
    }
    agent = characters.Bomber
    return locals()

def wood_box_lesson5_env():
    """Start up a FFA config with the default settings."""
    env = CustomPomme
    game_type = constants.GameType.OneVsOne
    env_entry_point = 'CustomPomme'
    env_id = 'WoodBoxLesson5'
    env_kwargs = {
        'game_type': game_type,
        'board_size': board_size,
        'free_board_size': constants.FREE_BOARD_SIZE,
        'num_rigid': 0,
        'num_wood': env_num_woods[5],
        'num_items': 0,
        'max_steps': constants.MAX_STEPS,
        'render_fps': 1000,
        'env': env_entry_point,
        "wood_reward": env_wood_reward[5], # added for box collecting env
        "time_step_reward": -0.001, # added for box collecting env
        "rand_agent_pos": True # randomize initial position of agent
    }
    agent = characters.Bomber
    return locals()

In [91]:
# log function during training, implement if needed
def log(local_var, global_var):
    pass
#     display(local_var)
#     display(global_var)

# number of cpu
n_cpu = 1

# env initialization

In [92]:
def initialize_env(config):
    env_pom = CustomPomme(**config["env_kwargs"])

    # config agents
    agents = []

    # Add opponent agents
    for agent_id in range(1):
        agents.append(StaticAgent(config["agent"](agent_id, config["game_type"])))

    # add player agent(to train)
    agents.append(PlayerAgent(config["agent"](1, config["game_type"])))

    env_pom.set_agents(agents)
    env_pom.set_training_agent(agents[1].agent_id)
    env_pom.set_init_game_state(None)

    # Seed and reset the environment
    env_pom.seed(0)
    
    # set up env vector
    env = DummyVecEnv([lambda: env_pom for i in range(n_cpu)])
    
    return env


def initialize_env_smart_random(config):
    env_pom = CustomPomme(**config["env_kwargs"])

    # config agents
    agents = []

    # Add opponent agents
    for agent_id in range(1):
        agents.append(SmartRandomAgentNoBomb(config["agent"](agent_id, config["game_type"])))

    # add player agent(to train)
    agents.append(PlayerAgent(config["agent"](1, config["game_type"])))

    env_pom.set_agents(agents)
    env_pom.set_training_agent(agents[1].agent_id)
    env_pom.set_init_game_state(None)

    # Seed and reset the environment
    env_pom.seed(0)
    
    # set up env vector
    env = DummyVecEnv([lambda: env_pom for i in range(n_cpu)])
    
    return env


def initialize_env_simple(config):
    env_pom = CustomPomme(**config["env_kwargs"])

    # config agents
    agents = []

    # Add opponent agents
    for agent_id in range(1):
        agents.append(SimpleAgent(config["agent"](agent_id, config["game_type"])))

    # add player agent(to train)
    agents.append(PlayerAgent(config["agent"](1, config["game_type"])))

    env_pom.set_agents(agents)
    env_pom.set_training_agent(agents[1].agent_id)
    env_pom.set_init_game_state(None)

    # Seed and reset the environment
    env_pom.seed(0)
    
    # set up env vector
    env = DummyVecEnv([lambda: env_pom for i in range(n_cpu)])
    
    return env

# Training loop

In [93]:
def train(model_name, model = None, env = None, n_steps=2400, total_timesteps=50000):
    if model == None:
        model = PPO2(#MlpPolicy,
                     CustomCNN,
                     #CnnLstmPolicy,
                     #MlpLstmPolicy,
                     env, 
                     verbose=1, 
                     n_steps = n_steps, # batch_size = n_step * num_env
                     ent_coef = 0.001, # entropy coefficient
                     nminibatches = 1,
                     tensorboard_log = "./ppo2_pommerman_box_collect_tensorboard_version4/")
    else:
        model.set_env(env)
    model = model.learn(total_timesteps = total_timesteps, # num_update = total_timesteps // batch_size
                        callback = log, tb_log_name = model_name)
    return model

# Testing loop

In [94]:
def test(env, model, render=False, total = 100):
    # test the learned model
    num_win = 0
    num_tie = 0
    num_lose = 0
    total = total # number of playouts
    for i_episode in range(total):
        obs = env.reset()
        done = False
        info = None
        while not done:
            if render:
                env.render()
                time.sleep(0.1)
            action_training, _states = model.predict(obs)
    #         print(action_training)
            obs, rewards, dones, infos = env.step(action_training)
    #         print(infos)
            done = dones[0]
            info = infos[0]
        print('Episode {} finished'.format(i_episode))
        if(info["result"].value == 0):
            if(1 in info["winners"]):
                num_win+=1
            else:
                num_lose+=1
        elif(info["result"].value == 2):
            num_tie+=1
    #     print(info)
    env.close()
    print("Win ", num_win, "/", total, " games")
    print("Tie ", num_tie, "/", total, " games")
    print("Lose ", num_lose, "/", total, " games")

# Training Lessons

## useful constants

In [95]:
total_timestep = 5000000
n_steps = 3200

## Training lesson 0.
### 11x11 grid with 2 wooden boxes

In [96]:
config_lesson0 = wood_box_lesson0_env()
env_lesson0 = initialize_env(config_lesson0)
model_names = ["wooden_box_collecting_lesson" + str(x) + "_agent_" + str(total_timestep) + "_" + str(n_steps) + "_" + "customCNN" for x in range(6)]

In [97]:
model_lesson0 = train(model_name = model_names[0], 
                      env = env_lesson0, 
                      n_steps = n_steps, 
                      total_timesteps=total_timestep)
# model_lesson0.save(model_names[0])

## Testing lesson0

In [56]:
# model_lesson0 = PPO2.load(model_names[0])
test(env_lesson0, model_lesson0, render = False)

Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished
Episode 7 finished
Episode 8 finished
Episode 9 finished
Episode 10 finished
Episode 11 finished
Episode 12 finished
Episode 13 finished
Episode 14 finished
Episode 15 finished
Episode 16 finished
Episode 17 finished
Episode 18 finished
Episode 19 finished
Episode 20 finished
Episode 21 finished
Episode 22 finished
Episode 23 finished
Episode 24 finished
Episode 25 finished
Episode 26 finished
Episode 27 finished
Episode 28 finished
Episode 29 finished
Episode 30 finished
Episode 31 finished
Episode 32 finished
Episode 33 finished
Episode 34 finished
Episode 35 finished
Episode 36 finished
Episode 37 finished
Episode 38 finished
Episode 39 finished
Episode 40 finished
Episode 41 finished
Episode 42 finished
Episode 43 finished
Episode 44 finished
Episode 45 finished
Episode 46 finished
Episode 47 finished
Episode 48 finished
Episode 49 finished
Episode 50

## Training lesson 1
### 11x11 grid with 4 wooden boxes

In [57]:
config_lesson1 = wood_box_lesson1_env()
env_lesson1 = initialize_env(config_lesson1)

In [58]:
# model_lesson0 = PPO2.load(load_path = model_names[0],
#                           tensorboard_log = "./ppo2_pommerman_box_collect_tensorboard/")
model_lesson1 = train(model_name = model_names[1], 
                      model = model_lesson0,
                      env = env_lesson1, 
                      n_steps = n_steps, 
                      total_timesteps = total_timestep)
# model_lesson1.save(model_names[1])

--------------------------------------
| approxkl           | 0.0004900428  |
| clipfrac           | 0.0061718747  |
| explained_variance | 0.215         |
| fps                | 1037          |
| n_updates          | 1             |
| policy_entropy     | 0.07649745    |
| policy_loss        | 0.00032511554 |
| serial_timesteps   | 3200          |
| time_elapsed       | 2.86e-06      |
| total_timesteps    | 3200          |
| value_loss         | 14.374535     |
--------------------------------------
---------------------------------------
| approxkl           | 0.00034197408  |
| clipfrac           | 0.004296875    |
| explained_variance | 0.337          |
| fps                | 1007           |
| n_updates          | 2              |
| policy_entropy     | 0.10851507     |
| policy_loss        | -0.00029847547 |
| serial_timesteps   | 6400           |
| time_elapsed       | 3.09           |
| total_timesteps    | 6400           |
| value_loss         | 6.8523774      |
-------------

-------------------------------------
| approxkl           | 0.0048488276 |
| clipfrac           | 0.06304688   |
| explained_variance | 0.647        |
| fps                | 1025         |
| n_updates          | 18           |
| policy_entropy     | 0.24386092   |
| policy_loss        | 0.0035211197 |
| serial_timesteps   | 57600        |
| time_elapsed       | 52.9         |
| total_timesteps    | 57600        |
| value_loss         | 7.1207514    |
-------------------------------------
---------------------------------------
| approxkl           | 4.4805096e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.8            |
| fps                | 999            |
| n_updates          | 19             |
| policy_entropy     | 0.06684391     |
| policy_loss        | -0.00013610921 |
| serial_timesteps   | 60800          |
| time_elapsed       | 56             |
| total_timesteps    | 60800          |
| value_loss         | 8.7858305      |
--------------------------

---------------------------------------
| approxkl           | 4.5361216e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.765          |
| fps                | 1002           |
| n_updates          | 35             |
| policy_entropy     | 0.21924731     |
| policy_loss        | -2.4213641e-05 |
| serial_timesteps   | 112000         |
| time_elapsed       | 108            |
| total_timesteps    | 112000         |
| value_loss         | 9.220855       |
---------------------------------------
--------------------------------------
| approxkl           | 0.000544503   |
| clipfrac           | 7.8125e-05    |
| explained_variance | 0.789         |
| fps                | 992           |
| n_updates          | 36            |
| policy_entropy     | 0.16321927    |
| policy_loss        | 0.00013391607 |
| serial_timesteps   | 115200        |
| time_elapsed       | 111           |
| total_timesteps    | 115200        |
| value_loss         | 11.824411     |
------------

--------------------------------------
| approxkl           | 2.7710586e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.874         |
| fps                | 1017          |
| n_updates          | 51            |
| policy_entropy     | 0.052872054   |
| policy_loss        | 1.622919e-05  |
| serial_timesteps   | 163200        |
| time_elapsed       | 157           |
| total_timesteps    | 163200        |
| value_loss         | 7.7100234     |
--------------------------------------
--------------------------------------
| approxkl           | 3.6751167e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.888         |
| fps                | 1015          |
| n_updates          | 52            |
| policy_entropy     | 0.102970935   |
| policy_loss        | -7.694684e-05 |
| serial_timesteps   | 166400        |
| time_elapsed       | 160           |
| total_timesteps    | 166400        |
| value_loss         | 4.3985605     |
-------------------------

---------------------------------------
| approxkl           | 1.2602213e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.903          |
| fps                | 1049           |
| n_updates          | 68             |
| policy_entropy     | 0.04798369     |
| policy_loss        | -1.3240651e-05 |
| serial_timesteps   | 217600         |
| time_elapsed       | 210            |
| total_timesteps    | 217600         |
| value_loss         | 7.972863       |
---------------------------------------
--------------------------------------
| approxkl           | 7.5745935e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.875         |
| fps                | 997           |
| n_updates          | 69            |
| policy_entropy     | 0.08072549    |
| policy_loss        | 3.059745e-05  |
| serial_timesteps   | 220800        |
| time_elapsed       | 213           |
| total_timesteps    | 220800        |
| value_loss         | 8.532363      |
------------

---------------------------------------
| approxkl           | 1.3208762e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.923          |
| fps                | 1050           |
| n_updates          | 85             |
| policy_entropy     | 0.07884805     |
| policy_loss        | -0.00014825238 |
| serial_timesteps   | 272000         |
| time_elapsed       | 263            |
| total_timesteps    | 272000         |
| value_loss         | 4.1826015      |
---------------------------------------
--------------------------------------
| approxkl           | 1.193255e-05  |
| clipfrac           | 7.8125e-05    |
| explained_variance | 0.913         |
| fps                | 1074          |
| n_updates          | 86            |
| policy_entropy     | 0.06639105    |
| policy_loss        | -6.604217e-05 |
| serial_timesteps   | 275200        |
| time_elapsed       | 266           |
| total_timesteps    | 275200        |
| value_loss         | 7.494822      |
------------

--------------------------------------
| approxkl           | 8.498719e-05  |
| clipfrac           | 0.000859375   |
| explained_variance | 0.951         |
| fps                | 1006          |
| n_updates          | 102           |
| policy_entropy     | 0.06511556    |
| policy_loss        | 2.3148283e-05 |
| serial_timesteps   | 326400        |
| time_elapsed       | 316           |
| total_timesteps    | 326400        |
| value_loss         | 3.8718483     |
--------------------------------------
---------------------------------------
| approxkl           | 4.8187696e-05  |
| clipfrac           | 0.00046875     |
| explained_variance | 0.896          |
| fps                | 1027           |
| n_updates          | 103            |
| policy_entropy     | 0.055288374    |
| policy_loss        | -0.00015579052 |
| serial_timesteps   | 329600         |
| time_elapsed       | 319            |
| total_timesteps    | 329600         |
| value_loss         | 9.369625       |
-------------

-------------------------------------
| approxkl           | 2.921318e-05 |
| clipfrac           | 0.0          |
| explained_variance | 0.895        |
| fps                | 946          |
| n_updates          | 119          |
| policy_entropy     | 0.13855585   |
| policy_loss        | 6.181538e-05 |
| serial_timesteps   | 380800       |
| time_elapsed       | 371          |
| total_timesteps    | 380800       |
| value_loss         | 5.3679757    |
-------------------------------------
--------------------------------------
| approxkl           | 3.7763384e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.955         |
| fps                | 993           |
| n_updates          | 120           |
| policy_entropy     | 0.104919456   |
| policy_loss        | -9.948004e-05 |
| serial_timesteps   | 384000        |
| time_elapsed       | 374           |
| total_timesteps    | 384000        |
| value_loss         | 3.383824      |
--------------------------------------

--------------------------------------
| approxkl           | 0.00017359649 |
| clipfrac           | 0.000546875   |
| explained_variance | -2.42         |
| fps                | 968           |
| n_updates          | 136           |
| policy_entropy     | 0.13903728    |
| policy_loss        | 1.1804696e-06 |
| serial_timesteps   | 435200        |
| time_elapsed       | 427           |
| total_timesteps    | 435200        |
| value_loss         | 0.91286504    |
--------------------------------------
--------------------------------------
| approxkl           | 0.0003144478  |
| clipfrac           | 0.0034375     |
| explained_variance | -3.25         |
| fps                | 1000          |
| n_updates          | 137           |
| policy_entropy     | 0.14826575    |
| policy_loss        | -0.0010272022 |
| serial_timesteps   | 438400        |
| time_elapsed       | 431           |
| total_timesteps    | 438400        |
| value_loss         | 0.59003866    |
-------------------------

-------------------------------------
| approxkl           | 8.079249e-06 |
| clipfrac           | 0.0          |
| explained_variance | 0.911        |
| fps                | 991          |
| n_updates          | 153          |
| policy_entropy     | 0.060636025  |
| policy_loss        | 5.242523e-05 |
| serial_timesteps   | 489600       |
| time_elapsed       | 480          |
| total_timesteps    | 489600       |
| value_loss         | 6.842199     |
-------------------------------------
---------------------------------------
| approxkl           | 3.0750387e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.886          |
| fps                | 985            |
| n_updates          | 154            |
| policy_entropy     | 0.16641156     |
| policy_loss        | -0.00013205013 |
| serial_timesteps   | 492800         |
| time_elapsed       | 484            |
| total_timesteps    | 492800         |
| value_loss         | 8.270615       |
--------------------------

--------------------------------------
| approxkl           | 0.000323029   |
| clipfrac           | 0.0040625     |
| explained_variance | -9.74e-05     |
| fps                | 908           |
| n_updates          | 170           |
| policy_entropy     | 0.16377547    |
| policy_loss        | -0.0009668315 |
| serial_timesteps   | 544000        |
| time_elapsed       | 536           |
| total_timesteps    | 544000        |
| value_loss         | 0.7979863     |
--------------------------------------
---------------------------------------
| approxkl           | 0.00035678118  |
| clipfrac           | 0.0042187497   |
| explained_variance | 0.198          |
| fps                | 922            |
| n_updates          | 171            |
| policy_entropy     | 0.12570739     |
| policy_loss        | -0.00022235297 |
| serial_timesteps   | 547200         |
| time_elapsed       | 539            |
| total_timesteps    | 547200         |
| value_loss         | 1.4259216      |
-------------

---------------------------------------
| approxkl           | 1.0297148e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.933          |
| fps                | 1020           |
| n_updates          | 187            |
| policy_entropy     | 0.06548214     |
| policy_loss        | -4.3921176e-07 |
| serial_timesteps   | 598400         |
| time_elapsed       | 591            |
| total_timesteps    | 598400         |
| value_loss         | 6.479633       |
---------------------------------------
---------------------------------------
| approxkl           | 0.0004075623   |
| clipfrac           | 0.005078125    |
| explained_variance | -3.08          |
| fps                | 987            |
| n_updates          | 188            |
| policy_entropy     | 0.15857361     |
| policy_loss        | -0.00074769044 |
| serial_timesteps   | 601600         |
| time_elapsed       | 594            |
| total_timesteps    | 601600         |
| value_loss         | 0.2989471      |


---------------------------------------
| approxkl           | 0.00015960203  |
| clipfrac           | 0.0011718749   |
| explained_variance | 0.738          |
| fps                | 1046           |
| n_updates          | 204            |
| policy_entropy     | 0.21007907     |
| policy_loss        | -0.00013808266 |
| serial_timesteps   | 652800         |
| time_elapsed       | 645            |
| total_timesteps    | 652800         |
| value_loss         | 0.887295       |
---------------------------------------
---------------------------------------
| approxkl           | 0.00018992732  |
| clipfrac           | 0.0017968749   |
| explained_variance | 0.957          |
| fps                | 1015           |
| n_updates          | 205            |
| policy_entropy     | 0.1419476      |
| policy_loss        | -0.00071164756 |
| serial_timesteps   | 656000         |
| time_elapsed       | 648            |
| total_timesteps    | 656000         |
| value_loss         | 0.36791426     |


--------------------------------------
| approxkl           | 6.455343e-05  |
| clipfrac           | 7.8125e-05    |
| explained_variance | 0.949         |
| fps                | 1011          |
| n_updates          | 221           |
| policy_entropy     | 0.13100998    |
| policy_loss        | 7.7114404e-05 |
| serial_timesteps   | 707200        |
| time_elapsed       | 700           |
| total_timesteps    | 707200        |
| value_loss         | 3.1519258     |
--------------------------------------
--------------------------------------
| approxkl           | 2.8619332e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.894         |
| fps                | 996           |
| n_updates          | 222           |
| policy_entropy     | 0.16985369    |
| policy_loss        | -8.006998e-05 |
| serial_timesteps   | 710400        |
| time_elapsed       | 703           |
| total_timesteps    | 710400        |
| value_loss         | 0.976047      |
-------------------------

--------------------------------------
| approxkl           | 2.2993067e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.882         |
| fps                | 1107          |
| n_updates          | 237           |
| policy_entropy     | 0.07195217    |
| policy_loss        | 3.0919912e-06 |
| serial_timesteps   | 758400        |
| time_elapsed       | 750           |
| total_timesteps    | 758400        |
| value_loss         | 7.587238      |
--------------------------------------
---------------------------------------
| approxkl           | 2.0852316e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.875          |
| fps                | 1064           |
| n_updates          | 238            |
| policy_entropy     | 0.10399804     |
| policy_loss        | -8.2274826e-05 |
| serial_timesteps   | 761600         |
| time_elapsed       | 753            |
| total_timesteps    | 761600         |
| value_loss         | 5.22048        |
-------------

---------------------------------------
| approxkl           | 0.00042726667  |
| clipfrac           | 0.001171875    |
| explained_variance | 0.887          |
| fps                | 1016           |
| n_updates          | 254            |
| policy_entropy     | 0.284496       |
| policy_loss        | -0.00028841966 |
| serial_timesteps   | 812800         |
| time_elapsed       | 803            |
| total_timesteps    | 812800         |
| value_loss         | 4.2046022      |
---------------------------------------
--------------------------------------
| approxkl           | 5.46125e-05   |
| clipfrac           | 0.0           |
| explained_variance | 0.924         |
| fps                | 1027          |
| n_updates          | 255           |
| policy_entropy     | 0.12285714    |
| policy_loss        | -8.890644e-05 |
| serial_timesteps   | 816000        |
| time_elapsed       | 807           |
| total_timesteps    | 816000        |
| value_loss         | 3.5788536     |
------------

---------------------------------------
| approxkl           | 4.2937863e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.919          |
| fps                | 908            |
| n_updates          | 271            |
| policy_entropy     | 0.11645645     |
| policy_loss        | -0.00012442574 |
| serial_timesteps   | 867200         |
| time_elapsed       | 858            |
| total_timesteps    | 867200         |
| value_loss         | 3.8880153      |
---------------------------------------
--------------------------------------
| approxkl           | 0.00015475872 |
| clipfrac           | 0.0           |
| explained_variance | 0.906         |
| fps                | 947           |
| n_updates          | 272           |
| policy_entropy     | 0.19172573    |
| policy_loss        | 0.0003742662  |
| serial_timesteps   | 870400        |
| time_elapsed       | 862           |
| total_timesteps    | 870400        |
| value_loss         | 4.1619296     |
------------

--------------------------------------
| approxkl           | 4.680046e-05  |
| clipfrac           | 0.0           |
| explained_variance | 0.929         |
| fps                | 1067          |
| n_updates          | 287           |
| policy_entropy     | 0.17157295    |
| policy_loss        | 4.1096508e-05 |
| serial_timesteps   | 918400        |
| time_elapsed       | 910           |
| total_timesteps    | 918400        |
| value_loss         | 3.5443275     |
--------------------------------------
--------------------------------------
| approxkl           | 0.00040760456 |
| clipfrac           | 0.005703125   |
| explained_variance | 0.976         |
| fps                | 1080          |
| n_updates          | 288           |
| policy_entropy     | 0.16587439    |
| policy_loss        | -0.0012465974 |
| serial_timesteps   | 921600        |
| time_elapsed       | 913           |
| total_timesteps    | 921600        |
| value_loss         | 0.45132565    |
-------------------------

---------------------------------------
| approxkl           | 7.167e-06      |
| clipfrac           | 0.0            |
| explained_variance | 0.971          |
| fps                | 955            |
| n_updates          | 304            |
| policy_entropy     | 0.103230104    |
| policy_loss        | -0.00015421711 |
| serial_timesteps   | 972800         |
| time_elapsed       | 964            |
| total_timesteps    | 972800         |
| value_loss         | 2.990739       |
---------------------------------------
---------------------------------------
| approxkl           | 2.7654396e-06  |
| clipfrac           | 0.0            |
| explained_variance | 0.955          |
| fps                | 997            |
| n_updates          | 305            |
| policy_entropy     | 0.0742533      |
| policy_loss        | -3.5836492e-05 |
| serial_timesteps   | 976000         |
| time_elapsed       | 967            |
| total_timesteps    | 976000         |
| value_loss         | 5.5778613      |


---------------------------------------
| approxkl           | 7.0880787e-06  |
| clipfrac           | 0.0            |
| explained_variance | 0.937          |
| fps                | 1020           |
| n_updates          | 321            |
| policy_entropy     | 0.055764377    |
| policy_loss        | -1.0980256e-05 |
| serial_timesteps   | 1027200        |
| time_elapsed       | 1.02e+03       |
| total_timesteps    | 1027200        |
| value_loss         | 7.656637       |
---------------------------------------
--------------------------------------
| approxkl           | 5.9710537e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.949         |
| fps                | 1068          |
| n_updates          | 322           |
| policy_entropy     | 0.065943286   |
| policy_loss        | -9.106547e-06 |
| serial_timesteps   | 1030400       |
| time_elapsed       | 1.02e+03      |
| total_timesteps    | 1030400       |
| value_loss         | 5.9711037     |
------------

--------------------------------------
| approxkl           | 0.00025635894 |
| clipfrac           | 0.0           |
| explained_variance | 0.829         |
| fps                | 1176          |
| n_updates          | 338           |
| policy_entropy     | 0.211986      |
| policy_loss        | -7.321678e-05 |
| serial_timesteps   | 1081600       |
| time_elapsed       | 1.16e+03      |
| total_timesteps    | 1081600       |
| value_loss         | 7.569174      |
--------------------------------------
-------------------------------------
| approxkl           | 9.642322e-05 |
| clipfrac           | 0.0          |
| explained_variance | 0.855        |
| fps                | 1156         |
| n_updates          | 339          |
| policy_entropy     | 0.24219158   |
| policy_loss        | 0.0001534593 |
| serial_timesteps   | 1084800      |
| time_elapsed       | 1.16e+03     |
| total_timesteps    | 1084800      |
| value_loss         | 9.404621     |
-------------------------------------

---------------------------------------
| approxkl           | 9.6246644e-05  |
| clipfrac           | 0.00046875002  |
| explained_variance | 0.978          |
| fps                | 972            |
| n_updates          | 354            |
| policy_entropy     | 0.12593196     |
| policy_loss        | -0.00049541285 |
| serial_timesteps   | 1132800        |
| time_elapsed       | 1.51e+03       |
| total_timesteps    | 1132800        |
| value_loss         | 0.6043535      |
---------------------------------------
--------------------------------------
| approxkl           | 0.00015993463 |
| clipfrac           | 0.000859375   |
| explained_variance | 0.952         |
| fps                | 764           |
| n_updates          | 355           |
| policy_entropy     | 0.10949943    |
| policy_loss        | 7.5222626e-05 |
| serial_timesteps   | 1136000       |
| time_elapsed       | 1.52e+03      |
| total_timesteps    | 1136000       |
| value_loss         | 3.3479536     |
------------

--------------------------------------
| approxkl           | 0.0012985116  |
| clipfrac           | 0.01578125    |
| explained_variance | -1.23         |
| fps                | 1159          |
| n_updates          | 370           |
| policy_entropy     | 0.15160127    |
| policy_loss        | -0.0021382917 |
| serial_timesteps   | 1184000       |
| time_elapsed       | 1.56e+03      |
| total_timesteps    | 1184000       |
| value_loss         | 0.071449146   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00068844366 |
| clipfrac           | 0.00796875    |
| explained_variance | -0.55         |
| fps                | 1115          |
| n_updates          | 371           |
| policy_entropy     | 0.15461126    |
| policy_loss        | -0.0013446289 |
| serial_timesteps   | 1187200       |
| time_elapsed       | 1.56e+03      |
| total_timesteps    | 1187200       |
| value_loss         | 0.107191585   |
-------------------------

--------------------------------------
| approxkl           | 0.00083461986 |
| clipfrac           | 0.010937501   |
| explained_variance | -0.845        |
| fps                | 1163          |
| n_updates          | 387           |
| policy_entropy     | 0.17282602    |
| policy_loss        | -0.0016397708 |
| serial_timesteps   | 1238400       |
| time_elapsed       | 1.61e+03      |
| total_timesteps    | 1238400       |
| value_loss         | 0.15886012    |
--------------------------------------
--------------------------------------
| approxkl           | 0.0007464562  |
| clipfrac           | 0.009921875   |
| explained_variance | 0.543         |
| fps                | 1117          |
| n_updates          | 388           |
| policy_entropy     | 0.16942802    |
| policy_loss        | -0.0010634129 |
| serial_timesteps   | 1241600       |
| time_elapsed       | 1.61e+03      |
| total_timesteps    | 1241600       |
| value_loss         | 0.16920182    |
-------------------------

--------------------------------------
| approxkl           | 0.000113025   |
| clipfrac           | 0.00078124995 |
| explained_variance | 0.91          |
| fps                | 796           |
| n_updates          | 404           |
| policy_entropy     | 0.071781695   |
| policy_loss        | -5.689874e-05 |
| serial_timesteps   | 1292800       |
| time_elapsed       | 1.66e+03      |
| total_timesteps    | 1292800       |
| value_loss         | 6.883385      |
--------------------------------------
-------------------------------------
| approxkl           | 0.0007294818 |
| clipfrac           | 0.00859375   |
| explained_variance | -0.0893      |
| fps                | 748          |
| n_updates          | 405          |
| policy_entropy     | 0.1625542    |
| policy_loss        | -0.002123354 |
| serial_timesteps   | 1296000      |
| time_elapsed       | 1.67e+03     |
| total_timesteps    | 1296000      |
| value_loss         | 0.07776278   |
-------------------------------------

--------------------------------------
| approxkl           | 0.0010203853  |
| clipfrac           | 0.011328125   |
| explained_variance | 0.547         |
| fps                | 991           |
| n_updates          | 421           |
| policy_entropy     | 0.16315638    |
| policy_loss        | -0.0018598814 |
| serial_timesteps   | 1347200       |
| time_elapsed       | 1.72e+03      |
| total_timesteps    | 1347200       |
| value_loss         | 0.023536775   |
--------------------------------------
---------------------------------------
| approxkl           | 0.00040638127  |
| clipfrac           | 0.004765625    |
| explained_variance | 0.884          |
| fps                | 1016           |
| n_updates          | 422            |
| policy_entropy     | 0.122184515    |
| policy_loss        | -2.8369577e-06 |
| serial_timesteps   | 1350400        |
| time_elapsed       | 1.73e+03       |
| total_timesteps    | 1350400        |
| value_loss         | 4.1450872      |
-------------

---------------------------------------
| approxkl           | 0.00030493634  |
| clipfrac           | 0.00265625     |
| explained_variance | 0.853          |
| fps                | 1200           |
| n_updates          | 438            |
| policy_entropy     | 0.15928286     |
| policy_loss        | -2.7569271e-05 |
| serial_timesteps   | 1401600        |
| time_elapsed       | 1.77e+03       |
| total_timesteps    | 1401600        |
| value_loss         | 2.6774316      |
---------------------------------------
--------------------------------------
| approxkl           | 0.002066662   |
| clipfrac           | 0.021015625   |
| explained_variance | 0.0687        |
| fps                | 1164          |
| n_updates          | 439           |
| policy_entropy     | 0.15705067    |
| policy_loss        | -0.0031809057 |
| serial_timesteps   | 1404800       |
| time_elapsed       | 1.78e+03      |
| total_timesteps    | 1404800       |
| value_loss         | 0.06643283    |
------------

--------------------------------------
| approxkl           | 0.00082629686 |
| clipfrac           | 0.009140625   |
| explained_variance | 0.966         |
| fps                | 1153          |
| n_updates          | 455           |
| policy_entropy     | 0.14467466    |
| policy_loss        | 0.00031010073 |
| serial_timesteps   | 1456000       |
| time_elapsed       | 1.82e+03      |
| total_timesteps    | 1456000       |
| value_loss         | 0.5142273     |
--------------------------------------
---------------------------------------
| approxkl           | 0.00040783812  |
| clipfrac           | 0.004921875    |
| explained_variance | 0.868          |
| fps                | 1155           |
| n_updates          | 456            |
| policy_entropy     | 0.14359543     |
| policy_loss        | -5.1646384e-05 |
| serial_timesteps   | 1459200        |
| time_elapsed       | 1.83e+03       |
| total_timesteps    | 1459200        |
| value_loss         | 3.4668093      |
-------------

--------------------------------------
| approxkl           | 0.0019703605  |
| clipfrac           | 0.027109373   |
| explained_variance | -0.101        |
| fps                | 1029          |
| n_updates          | 472           |
| policy_entropy     | 0.1545217     |
| policy_loss        | -0.0007941276 |
| serial_timesteps   | 1510400       |
| time_elapsed       | 1.88e+03      |
| total_timesteps    | 1510400       |
| value_loss         | 0.056198057   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0010782678 |
| clipfrac           | 0.01171875   |
| explained_variance | 0.856        |
| fps                | 1081         |
| n_updates          | 473          |
| policy_entropy     | 0.07157646   |
| policy_loss        | 0.0009132584 |
| serial_timesteps   | 1513600      |
| time_elapsed       | 1.89e+03     |
| total_timesteps    | 1513600      |
| value_loss         | 8.478175     |
-------------------------------------

--------------------------------------
| approxkl           | 0.0015029495  |
| clipfrac           | 0.017421875   |
| explained_variance | 0.144         |
| fps                | 1180          |
| n_updates          | 489           |
| policy_entropy     | 0.1384016     |
| policy_loss        | -0.0018744858 |
| serial_timesteps   | 1564800       |
| time_elapsed       | 1.93e+03      |
| total_timesteps    | 1564800       |
| value_loss         | 0.023851285   |
--------------------------------------
--------------------------------------
| approxkl           | 0.002056311   |
| clipfrac           | 0.02109375    |
| explained_variance | 0.212         |
| fps                | 1059          |
| n_updates          | 490           |
| policy_entropy     | 0.15611517    |
| policy_loss        | -0.0018260546 |
| serial_timesteps   | 1568000       |
| time_elapsed       | 1.93e+03      |
| total_timesteps    | 1568000       |
| value_loss         | 0.035201468   |
-------------------------

--------------------------------------
| approxkl           | 0.0021936498  |
| clipfrac           | 0.025234375   |
| explained_variance | 0.663         |
| fps                | 1113          |
| n_updates          | 506           |
| policy_entropy     | 0.13876593    |
| policy_loss        | -0.0020105937 |
| serial_timesteps   | 1619200       |
| time_elapsed       | 1.98e+03      |
| total_timesteps    | 1619200       |
| value_loss         | 0.021247683   |
--------------------------------------
---------------------------------------
| approxkl           | 0.0027644432   |
| clipfrac           | 0.02890625     |
| explained_variance | 0.984          |
| fps                | 1071           |
| n_updates          | 507            |
| policy_entropy     | 0.121977136    |
| policy_loss        | -0.00077818136 |
| serial_timesteps   | 1622400        |
| time_elapsed       | 1.99e+03       |
| total_timesteps    | 1622400        |
| value_loss         | 0.09016793     |
-------------

--------------------------------------
| approxkl           | 7.44201e-05   |
| clipfrac           | 0.00015625    |
| explained_variance | 0.896         |
| fps                | 1094          |
| n_updates          | 523           |
| policy_entropy     | 0.08074535    |
| policy_loss        | 5.7780147e-05 |
| serial_timesteps   | 1673600       |
| time_elapsed       | 2.03e+03      |
| total_timesteps    | 1673600       |
| value_loss         | 7.302557      |
--------------------------------------
--------------------------------------
| approxkl           | 5.29337e-05   |
| clipfrac           | 7.8125e-05    |
| explained_variance | 0.954         |
| fps                | 1049          |
| n_updates          | 524           |
| policy_entropy     | 0.11490747    |
| policy_loss        | -7.465064e-05 |
| serial_timesteps   | 1676800       |
| time_elapsed       | 2.03e+03      |
| total_timesteps    | 1676800       |
| value_loss         | 1.665329      |
-------------------------

--------------------------------------
| approxkl           | 0.00055541634 |
| clipfrac           | 0.007890625   |
| explained_variance | 0.336         |
| fps                | 1154          |
| n_updates          | 540           |
| policy_entropy     | 0.13713221    |
| policy_loss        | -0.0011983052 |
| serial_timesteps   | 1728000       |
| time_elapsed       | 2.08e+03      |
| total_timesteps    | 1728000       |
| value_loss         | 0.2109918     |
--------------------------------------
--------------------------------------
| approxkl           | 0.00048080637 |
| clipfrac           | 0.0053125     |
| explained_variance | 0.917         |
| fps                | 1178          |
| n_updates          | 541           |
| policy_entropy     | 0.07281839    |
| policy_loss        | -8.613162e-05 |
| serial_timesteps   | 1731200       |
| time_elapsed       | 2.08e+03      |
| total_timesteps    | 1731200       |
| value_loss         | 6.809321      |
-------------------------

--------------------------------------
| approxkl           | 0.0016621642  |
| clipfrac           | 0.016796876   |
| explained_variance | 0.085         |
| fps                | 1038          |
| n_updates          | 557           |
| policy_entropy     | 0.12739487    |
| policy_loss        | -0.0013105702 |
| serial_timesteps   | 1782400       |
| time_elapsed       | 2.13e+03      |
| total_timesteps    | 1782400       |
| value_loss         | 0.017237345   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0026541543  |
| clipfrac           | 0.028437499   |
| explained_variance | -0.637        |
| fps                | 1105          |
| n_updates          | 558           |
| policy_entropy     | 0.1318183     |
| policy_loss        | -0.0010513561 |
| serial_timesteps   | 1785600       |
| time_elapsed       | 2.13e+03      |
| total_timesteps    | 1785600       |
| value_loss         | 0.030584596   |
-------------------------

--------------------------------------
| approxkl           | 0.00046724526 |
| clipfrac           | 0.005859375   |
| explained_variance | 0.924         |
| fps                | 1111          |
| n_updates          | 574           |
| policy_entropy     | 0.102064215   |
| policy_loss        | 4.8048787e-06 |
| serial_timesteps   | 1836800       |
| time_elapsed       | 2.18e+03      |
| total_timesteps    | 1836800       |
| value_loss         | 3.6045482     |
--------------------------------------
---------------------------------------
| approxkl           | 0.00034400698  |
| clipfrac           | 0.004140625    |
| explained_variance | 0.981          |
| fps                | 1078           |
| n_updates          | 575            |
| policy_entropy     | 0.12513521     |
| policy_loss        | -0.00029257813 |
| serial_timesteps   | 1840000        |
| time_elapsed       | 2.18e+03       |
| total_timesteps    | 1840000        |
| value_loss         | 0.20795786     |
-------------

--------------------------------------
| approxkl           | 0.0003779402  |
| clipfrac           | 0.00421875    |
| explained_variance | 0.924         |
| fps                | 1172          |
| n_updates          | 591           |
| policy_entropy     | 0.09696774    |
| policy_loss        | 0.00015139102 |
| serial_timesteps   | 1891200       |
| time_elapsed       | 2.23e+03      |
| total_timesteps    | 1891200       |
| value_loss         | 3.5751197     |
--------------------------------------
---------------------------------------
| approxkl           | 0.00011510396  |
| clipfrac           | 0.00093750004  |
| explained_variance | 0.925          |
| fps                | 1159           |
| n_updates          | 592            |
| policy_entropy     | 0.09011434     |
| policy_loss        | 0.000102849604 |
| serial_timesteps   | 1894400        |
| time_elapsed       | 2.23e+03       |
| total_timesteps    | 1894400        |
| value_loss         | 3.5110188      |
-------------

--------------------------------------
| approxkl           | 3.7105223e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.901         |
| fps                | 1042          |
| n_updates          | 608           |
| policy_entropy     | 0.103853494   |
| policy_loss        | 5.507622e-05  |
| serial_timesteps   | 1945600       |
| time_elapsed       | 2.28e+03      |
| total_timesteps    | 1945600       |
| value_loss         | 5.2755623     |
--------------------------------------
---------------------------------------
| approxkl           | 1.4692588e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.892          |
| fps                | 1041           |
| n_updates          | 609            |
| policy_entropy     | 0.10714558     |
| policy_loss        | -4.8797912e-05 |
| serial_timesteps   | 1948800        |
| time_elapsed       | 2.28e+03       |
| total_timesteps    | 1948800        |
| value_loss         | 5.26824        |
-------------

---------------------------------------
| approxkl           | 0.00020680556  |
| clipfrac           | 0.002265625    |
| explained_variance | 0.915          |
| fps                | 1085           |
| n_updates          | 625            |
| policy_entropy     | 0.099524036    |
| policy_loss        | -3.8260214e-06 |
| serial_timesteps   | 2000000        |
| time_elapsed       | 2.33e+03       |
| total_timesteps    | 2000000        |
| value_loss         | 3.7142177      |
---------------------------------------
--------------------------------------
| approxkl           | 2.9501454e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.918         |
| fps                | 1116          |
| n_updates          | 626           |
| policy_entropy     | 0.06285336    |
| policy_loss        | -9.073287e-05 |
| serial_timesteps   | 2003200       |
| time_elapsed       | 2.33e+03      |
| total_timesteps    | 2003200       |
| value_loss         | 6.5503435     |
------------

--------------------------------------
| approxkl           | 0.00012147153 |
| clipfrac           | 0.00078125    |
| explained_variance | 0.917         |
| fps                | 1181          |
| n_updates          | 641           |
| policy_entropy     | 0.11179471    |
| policy_loss        | 0.00010212272 |
| serial_timesteps   | 2051200       |
| time_elapsed       | 2.38e+03      |
| total_timesteps    | 2051200       |
| value_loss         | 4.5654793     |
--------------------------------------
-------------------------------------
| approxkl           | 7.220971e-05 |
| clipfrac           | 7.8125e-05   |
| explained_variance | 0.908        |
| fps                | 1118         |
| n_updates          | 642          |
| policy_entropy     | 0.14632957   |
| policy_loss        | 3.75928e-05  |
| serial_timesteps   | 2054400      |
| time_elapsed       | 2.38e+03     |
| total_timesteps    | 2054400      |
| value_loss         | 2.5925317    |
-------------------------------------

---------------------------------------
| approxkl           | 0.00024652635  |
| clipfrac           | 0.002734375    |
| explained_variance | 0.937          |
| fps                | 1146           |
| n_updates          | 658            |
| policy_entropy     | 0.11929323     |
| policy_loss        | -5.8172944e-05 |
| serial_timesteps   | 2105600        |
| time_elapsed       | 2.42e+03       |
| total_timesteps    | 2105600        |
| value_loss         | 3.3255918      |
---------------------------------------
--------------------------------------
| approxkl           | 8.280094e-05  |
| clipfrac           | 0.00015625    |
| explained_variance | 0.941         |
| fps                | 1148          |
| n_updates          | 659           |
| policy_entropy     | 0.11446798    |
| policy_loss        | 0.00012004778 |
| serial_timesteps   | 2108800       |
| time_elapsed       | 2.43e+03      |
| total_timesteps    | 2108800       |
| value_loss         | 3.3251371     |
------------

-------------------------------------
| approxkl           | 0.0012084241 |
| clipfrac           | 0.01359375   |
| explained_variance | -0.551       |
| fps                | 1189         |
| n_updates          | 675          |
| policy_entropy     | 0.13631912   |
| policy_loss        | -0.002618686 |
| serial_timesteps   | 2160000      |
| time_elapsed       | 2.47e+03     |
| total_timesteps    | 2160000      |
| value_loss         | 0.054878704  |
-------------------------------------
---------------------------------------
| approxkl           | 0.001242649    |
| clipfrac           | 0.0153125      |
| explained_variance | -0.448         |
| fps                | 1170           |
| n_updates          | 676            |
| policy_entropy     | 0.14620876     |
| policy_loss        | -0.00088517484 |
| serial_timesteps   | 2163200        |
| time_elapsed       | 2.47e+03       |
| total_timesteps    | 2163200        |
| value_loss         | 0.07745503     |
--------------------------

------------------------------------
| approxkl           | 0.002673484 |
| clipfrac           | 0.031875    |
| explained_variance | 0.493       |
| fps                | 1163        |
| n_updates          | 692         |
| policy_entropy     | 0.155989    |
| policy_loss        | -0.0039553  |
| serial_timesteps   | 2214400     |
| time_elapsed       | 2.52e+03    |
| total_timesteps    | 2214400     |
| value_loss         | 0.058370713 |
------------------------------------
--------------------------------------
| approxkl           | 0.0011379744  |
| clipfrac           | 0.0131250005  |
| explained_variance | 0.239         |
| fps                | 1142          |
| n_updates          | 693           |
| policy_entropy     | 0.14573902    |
| policy_loss        | -0.0013813465 |
| serial_timesteps   | 2217600       |
| time_elapsed       | 2.52e+03      |
| total_timesteps    | 2217600       |
| value_loss         | 0.043898035   |
--------------------------------------
------------

---------------------------------------
| approxkl           | 0.00031118057  |
| clipfrac           | 0.00375        |
| explained_variance | 0.962          |
| fps                | 1162           |
| n_updates          | 709            |
| policy_entropy     | 0.11125866     |
| policy_loss        | -4.8534203e-05 |
| serial_timesteps   | 2268800        |
| time_elapsed       | 2.56e+03       |
| total_timesteps    | 2268800        |
| value_loss         | 2.6485496      |
---------------------------------------
---------------------------------------
| approxkl           | 0.00010600596  |
| clipfrac           | 0.000390625    |
| explained_variance | 0.275          |
| fps                | 1188           |
| n_updates          | 710            |
| policy_entropy     | 0.117043465    |
| policy_loss        | -4.3367967e-05 |
| serial_timesteps   | 2272000        |
| time_elapsed       | 2.57e+03       |
| total_timesteps    | 2272000        |
| value_loss         | 7.255129       |


--------------------------------------
| approxkl           | 0.00089326897 |
| clipfrac           | 0.010859376   |
| explained_variance | -0.447        |
| fps                | 1145          |
| n_updates          | 726           |
| policy_entropy     | 0.12903959    |
| policy_loss        | -0.0012457403 |
| serial_timesteps   | 2323200       |
| time_elapsed       | 2.61e+03      |
| total_timesteps    | 2323200       |
| value_loss         | 0.047783278   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00027201168 |
| clipfrac           | 0.00359375    |
| explained_variance | 0.955         |
| fps                | 1181          |
| n_updates          | 727           |
| policy_entropy     | 0.10116491    |
| policy_loss        | 5.6601315e-05 |
| serial_timesteps   | 2326400       |
| time_elapsed       | 2.61e+03      |
| total_timesteps    | 2326400       |
| value_loss         | 3.0492887     |
-------------------------

--------------------------------------
| approxkl           | 0.0017535565  |
| clipfrac           | 0.016562501   |
| explained_variance | 0.78          |
| fps                | 1069          |
| n_updates          | 743           |
| policy_entropy     | 0.14202559    |
| policy_loss        | -0.0015230747 |
| serial_timesteps   | 2377600       |
| time_elapsed       | 2.66e+03      |
| total_timesteps    | 2377600       |
| value_loss         | 0.054023802   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0010293565  |
| clipfrac           | 0.011875      |
| explained_variance | -0.239        |
| fps                | 1083          |
| n_updates          | 744           |
| policy_entropy     | 0.118615195   |
| policy_loss        | -0.0018417123 |
| serial_timesteps   | 2380800       |
| time_elapsed       | 2.66e+03      |
| total_timesteps    | 2380800       |
| value_loss         | 0.025984462   |
-------------------------

--------------------------------------
| approxkl           | 0.0011734536  |
| clipfrac           | 0.013828125   |
| explained_variance | 0.00908       |
| fps                | 1167          |
| n_updates          | 760           |
| policy_entropy     | 0.12528312    |
| policy_loss        | -0.0019145254 |
| serial_timesteps   | 2432000       |
| time_elapsed       | 2.71e+03      |
| total_timesteps    | 2432000       |
| value_loss         | 0.049563024   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0020511465 |
| clipfrac           | 0.019765625  |
| explained_variance | 0.299        |
| fps                | 1187         |
| n_updates          | 761          |
| policy_entropy     | 0.104077905  |
| policy_loss        | 0.0006923283 |
| serial_timesteps   | 2435200      |
| time_elapsed       | 2.71e+03     |
| total_timesteps    | 2435200      |
| value_loss         | 6.917614     |
-------------------------------------

--------------------------------------
| approxkl           | 0.00073110627 |
| clipfrac           | 0.008515625   |
| explained_variance | 0.948         |
| fps                | 1200          |
| n_updates          | 777           |
| policy_entropy     | 0.09816316    |
| policy_loss        | 3.045574e-05  |
| serial_timesteps   | 2486400       |
| time_elapsed       | 2.76e+03      |
| total_timesteps    | 2486400       |
| value_loss         | 3.1363754     |
--------------------------------------
--------------------------------------
| approxkl           | 0.00022500628 |
| clipfrac           | 0.002421875   |
| explained_variance | 0.337         |
| fps                | 1163          |
| n_updates          | 778           |
| policy_entropy     | 0.10084911    |
| policy_loss        | 0.00015795685 |
| serial_timesteps   | 2489600       |
| time_elapsed       | 2.76e+03      |
| total_timesteps    | 2489600       |
| value_loss         | 6.838389      |
-------------------------

--------------------------------------
| approxkl           | 0.00084888167 |
| clipfrac           | 0.0090625     |
| explained_variance | 0.324         |
| fps                | 981           |
| n_updates          | 794           |
| policy_entropy     | 0.11903782    |
| policy_loss        | -0.0021650167 |
| serial_timesteps   | 2540800       |
| time_elapsed       | 2.81e+03      |
| total_timesteps    | 2540800       |
| value_loss         | 0.021906782   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00028230986 |
| clipfrac           | 0.00359375    |
| explained_variance | 0.955         |
| fps                | 1033          |
| n_updates          | 795           |
| policy_entropy     | 0.073743954   |
| policy_loss        | 5.6999997e-05 |
| serial_timesteps   | 2544000       |
| time_elapsed       | 2.81e+03      |
| total_timesteps    | 2544000       |
| value_loss         | 2.944405      |
-------------------------

--------------------------------------
| approxkl           | 0.0006136995  |
| clipfrac           | 0.006796875   |
| explained_variance | -0.416        |
| fps                | 1146          |
| n_updates          | 811           |
| policy_entropy     | 0.12489915    |
| policy_loss        | -0.0018230982 |
| serial_timesteps   | 2595200       |
| time_elapsed       | 2.86e+03      |
| total_timesteps    | 2595200       |
| value_loss         | 0.0682794     |
--------------------------------------
--------------------------------------
| approxkl           | 0.00036421465 |
| clipfrac           | 0.0042968746  |
| explained_variance | 0.935         |
| fps                | 1178          |
| n_updates          | 812           |
| policy_entropy     | 0.10315517    |
| policy_loss        | 2.5338604e-05 |
| serial_timesteps   | 2598400       |
| time_elapsed       | 2.86e+03      |
| total_timesteps    | 2598400       |
| value_loss         | 3.3722792     |
-------------------------

--------------------------------------
| approxkl           | 0.0025894318  |
| clipfrac           | 0.024609376   |
| explained_variance | 0.279         |
| fps                | 1141          |
| n_updates          | 828           |
| policy_entropy     | 0.14858589    |
| policy_loss        | -0.0012878773 |
| serial_timesteps   | 2649600       |
| time_elapsed       | 2.9e+03       |
| total_timesteps    | 2649600       |
| value_loss         | 0.042898756   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00074824964 |
| clipfrac           | 0.00953125    |
| explained_variance | 0.917         |
| fps                | 1169          |
| n_updates          | 829           |
| policy_entropy     | 0.10918252    |
| policy_loss        | 0.00018567625 |
| serial_timesteps   | 2652800       |
| time_elapsed       | 2.91e+03      |
| total_timesteps    | 2652800       |
| value_loss         | 3.4564567     |
-------------------------

--------------------------------------
| approxkl           | 0.0016985072  |
| clipfrac           | 0.01671875    |
| explained_variance | 0.648         |
| fps                | 1059          |
| n_updates          | 845           |
| policy_entropy     | 0.13735014    |
| policy_loss        | -0.0022450471 |
| serial_timesteps   | 2704000       |
| time_elapsed       | 2.95e+03      |
| total_timesteps    | 2704000       |
| value_loss         | 0.011672845   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00040977544 |
| clipfrac           | 0.004453125   |
| explained_variance | 0.907         |
| fps                | 1090          |
| n_updates          | 846           |
| policy_entropy     | 0.08830945    |
| policy_loss        | 0.00021012359 |
| serial_timesteps   | 2707200       |
| time_elapsed       | 2.96e+03      |
| total_timesteps    | 2707200       |
| value_loss         | 3.8405447     |
-------------------------

--------------------------------------
| approxkl           | 0.0018008035  |
| clipfrac           | 0.01859375    |
| explained_variance | 0.0904        |
| fps                | 1124          |
| n_updates          | 862           |
| policy_entropy     | 0.1072287     |
| policy_loss        | -0.0012842426 |
| serial_timesteps   | 2758400       |
| time_elapsed       | 3e+03         |
| total_timesteps    | 2758400       |
| value_loss         | 0.010611022   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0014502611  |
| clipfrac           | 0.015703125   |
| explained_variance | 0.0647        |
| fps                | 1101          |
| n_updates          | 863           |
| policy_entropy     | 0.099463165   |
| policy_loss        | -0.0019466211 |
| serial_timesteps   | 2761600       |
| time_elapsed       | 3e+03         |
| total_timesteps    | 2761600       |
| value_loss         | 0.02635817    |
-------------------------

--------------------------------------
| approxkl           | 0.00045831484 |
| clipfrac           | 0.005078125   |
| explained_variance | -0.298        |
| fps                | 1093          |
| n_updates          | 879           |
| policy_entropy     | 0.10648332    |
| policy_loss        | -0.0013584483 |
| serial_timesteps   | 2812800       |
| time_elapsed       | 3.05e+03      |
| total_timesteps    | 2812800       |
| value_loss         | 0.027798872   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0037896472  |
| clipfrac           | 0.0346875     |
| explained_variance | 0.46          |
| fps                | 1100          |
| n_updates          | 880           |
| policy_entropy     | 0.11859599    |
| policy_loss        | -0.0013034507 |
| serial_timesteps   | 2816000       |
| time_elapsed       | 3.05e+03      |
| total_timesteps    | 2816000       |
| value_loss         | 0.022000976   |
-------------------------

--------------------------------------
| approxkl           | 0.0008169038  |
| clipfrac           | 0.010703125   |
| explained_variance | 0.684         |
| fps                | 1096          |
| n_updates          | 896           |
| policy_entropy     | 0.13056496    |
| policy_loss        | -0.0018541185 |
| serial_timesteps   | 2867200       |
| time_elapsed       | 3.1e+03       |
| total_timesteps    | 2867200       |
| value_loss         | 0.018953603   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0023591123  |
| clipfrac           | 0.025859375   |
| explained_variance | 0.547         |
| fps                | 1112          |
| n_updates          | 897           |
| policy_entropy     | 0.12303169    |
| policy_loss        | -0.0017822322 |
| serial_timesteps   | 2870400       |
| time_elapsed       | 3.1e+03       |
| total_timesteps    | 2870400       |
| value_loss         | 0.016310077   |
-------------------------

--------------------------------------
| approxkl           | 0.0013432653  |
| clipfrac           | 0.0121875     |
| explained_variance | -0.143        |
| fps                | 1066          |
| n_updates          | 913           |
| policy_entropy     | 0.1476619     |
| policy_loss        | -0.0014855638 |
| serial_timesteps   | 2921600       |
| time_elapsed       | 3.15e+03      |
| total_timesteps    | 2921600       |
| value_loss         | 0.042150956   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00018001546 |
| clipfrac           | 0.001328125   |
| explained_variance | 0.942         |
| fps                | 1098          |
| n_updates          | 914           |
| policy_entropy     | 0.11644127    |
| policy_loss        | 8.644853e-06  |
| serial_timesteps   | 2924800       |
| time_elapsed       | 3.15e+03      |
| total_timesteps    | 2924800       |
| value_loss         | 3.236383      |
-------------------------

--------------------------------------
| approxkl           | 0.00072479475 |
| clipfrac           | 0.0071875006  |
| explained_variance | 0.587         |
| fps                | 1133          |
| n_updates          | 930           |
| policy_entropy     | 0.1449567     |
| policy_loss        | 0.000930576   |
| serial_timesteps   | 2976000       |
| time_elapsed       | 3.2e+03       |
| total_timesteps    | 2976000       |
| value_loss         | 6.1034665     |
--------------------------------------
--------------------------------------
| approxkl           | 0.00020054815 |
| clipfrac           | 0.00203125    |
| explained_variance | 0.921         |
| fps                | 1158          |
| n_updates          | 931           |
| policy_entropy     | 0.060853485   |
| policy_loss        | 3.1519012e-05 |
| serial_timesteps   | 2979200       |
| time_elapsed       | 3.2e+03       |
| total_timesteps    | 2979200       |
| value_loss         | 6.5571055     |
-------------------------

--------------------------------------
| approxkl           | 0.001789878   |
| clipfrac           | 0.0190625     |
| explained_variance | 0.574         |
| fps                | 1108          |
| n_updates          | 947           |
| policy_entropy     | 0.13488975    |
| policy_loss        | -0.0028264034 |
| serial_timesteps   | 3030400       |
| time_elapsed       | 3.25e+03      |
| total_timesteps    | 3030400       |
| value_loss         | 0.020140681   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00033114039 |
| clipfrac           | 0.0040625     |
| explained_variance | 0.912         |
| fps                | 1069          |
| n_updates          | 948           |
| policy_entropy     | 0.06739773    |
| policy_loss        | 4.3453725e-05 |
| serial_timesteps   | 3033600       |
| time_elapsed       | 3.25e+03      |
| total_timesteps    | 3033600       |
| value_loss         | 5.927335      |
-------------------------

--------------------------------------
| approxkl           | 4.0420542e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.963         |
| fps                | 1124          |
| n_updates          | 964           |
| policy_entropy     | 0.10752417    |
| policy_loss        | -3.288746e-05 |
| serial_timesteps   | 3084800       |
| time_elapsed       | 3.29e+03      |
| total_timesteps    | 3084800       |
| value_loss         | 2.3292282     |
--------------------------------------
--------------------------------------
| approxkl           | 0.00059968745 |
| clipfrac           | 0.009453125   |
| explained_variance | 0.874         |
| fps                | 1174          |
| n_updates          | 965           |
| policy_entropy     | 0.18279642    |
| policy_loss        | 6.8644644e-05 |
| serial_timesteps   | 3088000       |
| time_elapsed       | 3.3e+03       |
| total_timesteps    | 3088000       |
| value_loss         | 4.7865686     |
-------------------------

--------------------------------------
| approxkl           | 0.0018985451  |
| clipfrac           | 0.017031249   |
| explained_variance | 0.972         |
| fps                | 1190          |
| n_updates          | 981           |
| policy_entropy     | 0.10769118    |
| policy_loss        | 0.00029245208 |
| serial_timesteps   | 3139200       |
| time_elapsed       | 3.34e+03      |
| total_timesteps    | 3139200       |
| value_loss         | 0.80913365    |
--------------------------------------
--------------------------------------
| approxkl           | 0.00043073558 |
| clipfrac           | 0.005703125   |
| explained_variance | 0.906         |
| fps                | 1167          |
| n_updates          | 982           |
| policy_entropy     | 0.08013654    |
| policy_loss        | 0.00034270628 |
| serial_timesteps   | 3142400       |
| time_elapsed       | 3.35e+03      |
| total_timesteps    | 3142400       |
| value_loss         | 5.6054726     |
-------------------------

--------------------------------------
| approxkl           | 0.0005312639  |
| clipfrac           | 0.006953125   |
| explained_variance | 0.893         |
| fps                | 1168          |
| n_updates          | 998           |
| policy_entropy     | 0.13542597    |
| policy_loss        | -0.0013883044 |
| serial_timesteps   | 3193600       |
| time_elapsed       | 3.39e+03      |
| total_timesteps    | 3193600       |
| value_loss         | 0.12880504    |
--------------------------------------
--------------------------------------
| approxkl           | 0.0002153187  |
| clipfrac           | 0.0021093749  |
| explained_variance | 0.948         |
| fps                | 1101          |
| n_updates          | 999           |
| policy_entropy     | 0.094640896   |
| policy_loss        | 0.00019144587 |
| serial_timesteps   | 3196800       |
| time_elapsed       | 3.39e+03      |
| total_timesteps    | 3196800       |
| value_loss         | 3.0391731     |
-------------------------

--------------------------------------
| approxkl           | 8.928959e-05  |
| clipfrac           | 0.0003125     |
| explained_variance | 0.911         |
| fps                | 1097          |
| n_updates          | 1015          |
| policy_entropy     | 0.06369417    |
| policy_loss        | -2.605468e-06 |
| serial_timesteps   | 3248000       |
| time_elapsed       | 3.44e+03      |
| total_timesteps    | 3248000       |
| value_loss         | 6.986989      |
--------------------------------------
---------------------------------------
| approxkl           | 0.00059622736  |
| clipfrac           | 0.00734375     |
| explained_variance | 0.971          |
| fps                | 1183           |
| n_updates          | 1016           |
| policy_entropy     | 0.10595438     |
| policy_loss        | -0.00036726252 |
| serial_timesteps   | 3251200        |
| time_elapsed       | 3.44e+03       |
| total_timesteps    | 3251200        |
| value_loss         | 0.23578909     |
-------------

--------------------------------------
| approxkl           | 0.00058220234 |
| clipfrac           | 0.007421875   |
| explained_variance | 0.207         |
| fps                | 1168          |
| n_updates          | 1032          |
| policy_entropy     | 0.10485876    |
| policy_loss        | -0.0012764835 |
| serial_timesteps   | 3302400       |
| time_elapsed       | 3.49e+03      |
| total_timesteps    | 3302400       |
| value_loss         | 0.025822349   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00044239685 |
| clipfrac           | 0.00484375    |
| explained_variance | 0.938         |
| fps                | 1196          |
| n_updates          | 1033          |
| policy_entropy     | 0.07692534    |
| policy_loss        | 0.00017028578 |
| serial_timesteps   | 3305600       |
| time_elapsed       | 3.49e+03      |
| total_timesteps    | 3305600       |
| value_loss         | 3.3253045     |
-------------------------

--------------------------------------
| approxkl           | 4.3572025e-05 |
| clipfrac           | 0.00015625    |
| explained_variance | 0.927         |
| fps                | 1007          |
| n_updates          | 1048          |
| policy_entropy     | 0.058487955   |
| policy_loss        | 0.0001143463  |
| serial_timesteps   | 3353600       |
| time_elapsed       | 3.53e+03      |
| total_timesteps    | 3353600       |
| value_loss         | 5.99812       |
--------------------------------------
---------------------------------------
| approxkl           | 2.5660796e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.941          |
| fps                | 1062           |
| n_updates          | 1049           |
| policy_entropy     | 0.07999531     |
| policy_loss        | -4.5694564e-05 |
| serial_timesteps   | 3356800        |
| time_elapsed       | 3.53e+03       |
| total_timesteps    | 3356800        |
| value_loss         | 2.7226691      |
-------------

---------------------------------------
| approxkl           | 0.00016338385  |
| clipfrac           | 0.0009375      |
| explained_variance | -0.34          |
| fps                | 1150           |
| n_updates          | 1064           |
| policy_entropy     | 0.11968626     |
| policy_loss        | -0.00051447033 |
| serial_timesteps   | 3404800        |
| time_elapsed       | 3.58e+03       |
| total_timesteps    | 3404800        |
| value_loss         | 0.058224596    |
---------------------------------------
--------------------------------------
| approxkl           | 0.00023493252 |
| clipfrac           | 0.001953125   |
| explained_variance | 0.953         |
| fps                | 1197          |
| n_updates          | 1065          |
| policy_entropy     | 0.07447673    |
| policy_loss        | 8.0501064e-05 |
| serial_timesteps   | 3408000       |
| time_elapsed       | 3.58e+03      |
| total_timesteps    | 3408000       |
| value_loss         | 3.074489      |
------------

---------------------------------------
| approxkl           | 1.015079e-05   |
| clipfrac           | 0.0            |
| explained_variance | 0.953          |
| fps                | 1111           |
| n_updates          | 1081           |
| policy_entropy     | 0.10868143     |
| policy_loss        | -2.9722676e-05 |
| serial_timesteps   | 3459200        |
| time_elapsed       | 3.63e+03       |
| total_timesteps    | 3459200        |
| value_loss         | 3.1905696      |
---------------------------------------
--------------------------------------
| approxkl           | 0.0016913377  |
| clipfrac           | 0.018359374   |
| explained_variance | -2.64         |
| fps                | 1097          |
| n_updates          | 1082          |
| policy_entropy     | 0.13425143    |
| policy_loss        | -0.0018327297 |
| serial_timesteps   | 3462400       |
| time_elapsed       | 3.63e+03      |
| total_timesteps    | 3462400       |
| value_loss         | 0.08490401    |
------------

--------------------------------------
| approxkl           | 0.00151071    |
| clipfrac           | 0.01609375    |
| explained_variance | 0.933         |
| fps                | 1138          |
| n_updates          | 1098          |
| policy_entropy     | 0.08287446    |
| policy_loss        | 0.00029151514 |
| serial_timesteps   | 3513600       |
| time_elapsed       | 3.68e+03      |
| total_timesteps    | 3513600       |
| value_loss         | 3.4470236     |
--------------------------------------
--------------------------------------
| approxkl           | 0.0010003457  |
| clipfrac           | 0.0103124995  |
| explained_variance | -0.135        |
| fps                | 1105          |
| n_updates          | 1099          |
| policy_entropy     | 0.10256379    |
| policy_loss        | -0.0018545968 |
| serial_timesteps   | 3516800       |
| time_elapsed       | 3.68e+03      |
| total_timesteps    | 3516800       |
| value_loss         | 0.025385853   |
-------------------------

--------------------------------------
| approxkl           | 4.0255065e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.941         |
| fps                | 1171          |
| n_updates          | 1115          |
| policy_entropy     | 0.080307215   |
| policy_loss        | 4.2958036e-05 |
| serial_timesteps   | 3568000       |
| time_elapsed       | 3.73e+03      |
| total_timesteps    | 3568000       |
| value_loss         | 3.2177122     |
--------------------------------------
--------------------------------------
| approxkl           | 0.0008361159  |
| clipfrac           | 0.011640625   |
| explained_variance | 0.497         |
| fps                | 1155          |
| n_updates          | 1116          |
| policy_entropy     | 0.11739941    |
| policy_loss        | -0.0018086941 |
| serial_timesteps   | 3571200       |
| time_elapsed       | 3.73e+03      |
| total_timesteps    | 3571200       |
| value_loss         | 0.01733047    |
-------------------------

--------------------------------------
| approxkl           | 0.0014249845  |
| clipfrac           | 0.01328125    |
| explained_variance | 0.514         |
| fps                | 1183          |
| n_updates          | 1132          |
| policy_entropy     | 0.08586887    |
| policy_loss        | -0.0022950713 |
| serial_timesteps   | 3622400       |
| time_elapsed       | 3.77e+03      |
| total_timesteps    | 3622400       |
| value_loss         | 0.017896969   |
--------------------------------------
--------------------------------------
| approxkl           | 0.002056364   |
| clipfrac           | 0.019140625   |
| explained_variance | 0.157         |
| fps                | 1212          |
| n_updates          | 1133          |
| policy_entropy     | 0.093549445   |
| policy_loss        | -0.0018435662 |
| serial_timesteps   | 3625600       |
| time_elapsed       | 3.77e+03      |
| total_timesteps    | 3625600       |
| value_loss         | 0.02628752    |
-------------------------

--------------------------------------
| approxkl           | 0.0018725125  |
| clipfrac           | 0.01796875    |
| explained_variance | 0.265         |
| fps                | 1171          |
| n_updates          | 1149          |
| policy_entropy     | 0.10640392    |
| policy_loss        | -0.0027757464 |
| serial_timesteps   | 3676800       |
| time_elapsed       | 3.82e+03      |
| total_timesteps    | 3676800       |
| value_loss         | 0.028284648   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0008634271  |
| clipfrac           | 0.00953125    |
| explained_variance | 0.914         |
| fps                | 1136          |
| n_updates          | 1150          |
| policy_entropy     | 0.07500757    |
| policy_loss        | 0.00015704356 |
| serial_timesteps   | 3680000       |
| time_elapsed       | 3.82e+03      |
| total_timesteps    | 3680000       |
| value_loss         | 3.742785      |
-------------------------

---------------------------------------
| approxkl           | 0.00014547328  |
| clipfrac           | 0.0017968749   |
| explained_variance | 0.909          |
| fps                | 1220           |
| n_updates          | 1166           |
| policy_entropy     | 0.041025337    |
| policy_loss        | -0.00014664701 |
| serial_timesteps   | 3731200        |
| time_elapsed       | 3.87e+03       |
| total_timesteps    | 3731200        |
| value_loss         | 6.838318       |
---------------------------------------
--------------------------------------
| approxkl           | 0.00019407753 |
| clipfrac           | 0.001875      |
| explained_variance | 0.932         |
| fps                | 1184          |
| n_updates          | 1167          |
| policy_entropy     | 0.07683013    |
| policy_loss        | 6.608069e-06  |
| serial_timesteps   | 3734400       |
| time_elapsed       | 3.87e+03      |
| total_timesteps    | 3734400       |
| value_loss         | 3.2702937     |
------------

--------------------------------------
| approxkl           | 0.00043056038 |
| clipfrac           | 0.00484375    |
| explained_variance | 0.919         |
| fps                | 1183          |
| n_updates          | 1183          |
| policy_entropy     | 0.071677335   |
| policy_loss        | 0.00016136646 |
| serial_timesteps   | 3785600       |
| time_elapsed       | 3.91e+03      |
| total_timesteps    | 3785600       |
| value_loss         | 3.6585705     |
--------------------------------------
---------------------------------------
| approxkl           | 0.000106899985 |
| clipfrac           | 0.001015625    |
| explained_variance | 0.919          |
| fps                | 1163           |
| n_updates          | 1184           |
| policy_entropy     | 0.07109007     |
| policy_loss        | -0.00012825496 |
| serial_timesteps   | 3788800        |
| time_elapsed       | 3.92e+03       |
| total_timesteps    | 3788800        |
| value_loss         | 3.786237       |
-------------

--------------------------------------
| approxkl           | 0.0009074183  |
| clipfrac           | 0.01015625    |
| explained_variance | 0.922         |
| fps                | 1208          |
| n_updates          | 1200          |
| policy_entropy     | 0.07867557    |
| policy_loss        | 0.00040236473 |
| serial_timesteps   | 3840000       |
| time_elapsed       | 3.96e+03      |
| total_timesteps    | 3840000       |
| value_loss         | 3.6331058     |
--------------------------------------
--------------------------------------
| approxkl           | 0.00019668226 |
| clipfrac           | 0.00265625    |
| explained_variance | 0.922         |
| fps                | 1208          |
| n_updates          | 1201          |
| policy_entropy     | 0.06369753    |
| policy_loss        | 8.3427134e-05 |
| serial_timesteps   | 3843200       |
| time_elapsed       | 3.97e+03      |
| total_timesteps    | 3843200       |
| value_loss         | 3.6090202     |
-------------------------

---------------------------------------
| approxkl           | 0.000113443166 |
| clipfrac           | 0.00109375     |
| explained_variance | 0.907          |
| fps                | 1193           |
| n_updates          | 1217           |
| policy_entropy     | 0.063284166    |
| policy_loss        | 0.00014055081  |
| serial_timesteps   | 3894400        |
| time_elapsed       | 4.01e+03       |
| total_timesteps    | 3894400        |
| value_loss         | 5.650913       |
---------------------------------------
--------------------------------------
| approxkl           | 0.00016459473 |
| clipfrac           | 0.00140625    |
| explained_variance | 0.178         |
| fps                | 1154          |
| n_updates          | 1218          |
| policy_entropy     | 0.093867004   |
| policy_loss        | -0.0017723024 |
| serial_timesteps   | 3897600       |
| time_elapsed       | 4.01e+03      |
| total_timesteps    | 3897600       |
| value_loss         | 0.03918542    |
------------

---------------------------------------
| approxkl           | 0.00045382424  |
| clipfrac           | 0.005859375    |
| explained_variance | 0.914          |
| fps                | 1211           |
| n_updates          | 1234           |
| policy_entropy     | 0.0733977      |
| policy_loss        | -0.00017321986 |
| serial_timesteps   | 3948800        |
| time_elapsed       | 4.06e+03       |
| total_timesteps    | 3948800        |
| value_loss         | 3.7367766      |
---------------------------------------
-------------------------------------
| approxkl           | 0.0004616542 |
| clipfrac           | 0.00546875   |
| explained_variance | -0.331       |
| fps                | 1170         |
| n_updates          | 1235         |
| policy_entropy     | 0.10323337   |
| policy_loss        | -0.00126734  |
| serial_timesteps   | 3952000      |
| time_elapsed       | 4.06e+03     |
| total_timesteps    | 3952000      |
| value_loss         | 0.02424173   |
------------------------

--------------------------------------
| approxkl           | 0.0010666966  |
| clipfrac           | 0.0150781255  |
| explained_variance | 0.0161        |
| fps                | 1181          |
| n_updates          | 1251          |
| policy_entropy     | 0.096797645   |
| policy_loss        | -0.0017775509 |
| serial_timesteps   | 4003200       |
| time_elapsed       | 4.1e+03       |
| total_timesteps    | 4003200       |
| value_loss         | 0.026948642   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0010100665  |
| clipfrac           | 0.010859375   |
| explained_variance | 0.94          |
| fps                | 1102          |
| n_updates          | 1252          |
| policy_entropy     | 0.074849      |
| policy_loss        | 0.00047680057 |
| serial_timesteps   | 4006400       |
| time_elapsed       | 4.1e+03       |
| total_timesteps    | 4006400       |
| value_loss         | 3.2854705     |
-------------------------

---------------------------------------
| approxkl           | 0.00097105507  |
| clipfrac           | 0.012109375    |
| explained_variance | 0.901          |
| fps                | 1196           |
| n_updates          | 1268           |
| policy_entropy     | 0.09524701     |
| policy_loss        | -0.00019922994 |
| serial_timesteps   | 4057600        |
| time_elapsed       | 4.15e+03       |
| total_timesteps    | 4057600        |
| value_loss         | 0.38529742     |
---------------------------------------
-------------------------------------
| approxkl           | 5.139671e-05 |
| clipfrac           | 0.0          |
| explained_variance | 0.788        |
| fps                | 1188         |
| n_updates          | 1269         |
| policy_entropy     | 0.0732699    |
| policy_loss        | 0.0001716651 |
| serial_timesteps   | 4060800      |
| time_elapsed       | 4.15e+03     |
| total_timesteps    | 4060800      |
| value_loss         | 5.0439196    |
------------------------

--------------------------------------
| approxkl           | 2.2485583e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.892         |
| fps                | 1185          |
| n_updates          | 1285          |
| policy_entropy     | 0.068892315   |
| policy_loss        | 5.528405e-06  |
| serial_timesteps   | 4112000       |
| time_elapsed       | 4.19e+03      |
| total_timesteps    | 4112000       |
| value_loss         | 4.4222827     |
--------------------------------------
---------------------------------------
| approxkl           | 8.846602e-06   |
| clipfrac           | 0.0            |
| explained_variance | 0.86           |
| fps                | 1231           |
| n_updates          | 1286           |
| policy_entropy     | 0.030876772    |
| policy_loss        | -4.4713317e-05 |
| serial_timesteps   | 4115200        |
| time_elapsed       | 4.2e+03        |
| total_timesteps    | 4115200        |
| value_loss         | 11.1281        |
-------------

---------------------------------------
| approxkl           | 4.3905133e-05  |
| clipfrac           | 0.000234375    |
| explained_variance | 0.928          |
| fps                | 1176           |
| n_updates          | 1302           |
| policy_entropy     | 0.083614305    |
| policy_loss        | -0.00017103975 |
| serial_timesteps   | 4166400        |
| time_elapsed       | 4.24e+03       |
| total_timesteps    | 4166400        |
| value_loss         | 3.8792691      |
---------------------------------------
---------------------------------------
| approxkl           | 1.1786413e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.911          |
| fps                | 1187           |
| n_updates          | 1303           |
| policy_entropy     | 0.048840135    |
| policy_loss        | -1.0763146e-05 |
| serial_timesteps   | 4169600        |
| time_elapsed       | 4.24e+03       |
| total_timesteps    | 4169600        |
| value_loss         | 6.83258        |


-------------------------------------
| approxkl           | 0.0001899089 |
| clipfrac           | 0.00203125   |
| explained_variance | -9.96        |
| fps                | 1129         |
| n_updates          | 1319         |
| policy_entropy     | 0.10580932   |
| policy_loss        | 9.097092e-05 |
| serial_timesteps   | 4220800      |
| time_elapsed       | 4.29e+03     |
| total_timesteps    | 4220800      |
| value_loss         | 1.0287193    |
-------------------------------------
--------------------------------------
| approxkl           | 1.44799e-05   |
| clipfrac           | 7.8125e-05    |
| explained_variance | 0.928         |
| fps                | 1124          |
| n_updates          | 1320          |
| policy_entropy     | 0.14580593    |
| policy_loss        | 1.4434978e-05 |
| serial_timesteps   | 4224000       |
| time_elapsed       | 4.29e+03      |
| total_timesteps    | 4224000       |
| value_loss         | 6.6628647     |
--------------------------------------

-------------------------------------
| approxkl           | 0.0024535423 |
| clipfrac           | 0.03875      |
| explained_variance | 0.949        |
| fps                | 1199         |
| n_updates          | 1336         |
| policy_entropy     | 0.22119072   |
| policy_loss        | 0.0016110649 |
| serial_timesteps   | 4275200      |
| time_elapsed       | 4.33e+03     |
| total_timesteps    | 4275200      |
| value_loss         | 6.263772     |
-------------------------------------
---------------------------------------
| approxkl           | 0.00019987248  |
| clipfrac           | 0.0046875      |
| explained_variance | 0.783          |
| fps                | 1194           |
| n_updates          | 1337           |
| policy_entropy     | 0.17019656     |
| policy_loss        | -1.5475005e-05 |
| serial_timesteps   | 4278400        |
| time_elapsed       | 4.34e+03       |
| total_timesteps    | 4278400        |
| value_loss         | 5.6539545      |
--------------------------

---------------------------------------
| approxkl           | 0.00046246708  |
| clipfrac           | 0.001953125    |
| explained_variance | 0.953          |
| fps                | 817            |
| n_updates          | 1353           |
| policy_entropy     | 0.16222851     |
| policy_loss        | -0.00044708885 |
| serial_timesteps   | 4329600        |
| time_elapsed       | 4.39e+03       |
| total_timesteps    | 4329600        |
| value_loss         | 5.8319545      |
---------------------------------------
---------------------------------------
| approxkl           | 0.0015429878   |
| clipfrac           | 0.026328124    |
| explained_variance | 0.804          |
| fps                | 832            |
| n_updates          | 1354           |
| policy_entropy     | 0.3013178      |
| policy_loss        | -3.7160964e-05 |
| serial_timesteps   | 4332800        |
| time_elapsed       | 4.39e+03       |
| total_timesteps    | 4332800        |
| value_loss         | 10.231253      |


--------------------------------------
| approxkl           | 0.00039203287 |
| clipfrac           | 0.0           |
| explained_variance | 0.932         |
| fps                | 1175          |
| n_updates          | 1369          |
| policy_entropy     | 0.27827567    |
| policy_loss        | 0.00020580173 |
| serial_timesteps   | 4380800       |
| time_elapsed       | 4.44e+03      |
| total_timesteps    | 4380800       |
| value_loss         | 3.5622516     |
--------------------------------------
---------------------------------------
| approxkl           | 0.0002697415   |
| clipfrac           | 0.000625       |
| explained_variance | 0.913          |
| fps                | 1139           |
| n_updates          | 1370           |
| policy_entropy     | 0.1527495      |
| policy_loss        | -0.00036641114 |
| serial_timesteps   | 4384000        |
| time_elapsed       | 4.44e+03       |
| total_timesteps    | 4384000        |
| value_loss         | 0.6311551      |
-------------

---------------------------------------
| approxkl           | 0.0004741777   |
| clipfrac           | 0.00015625     |
| explained_variance | 0.922          |
| fps                | 1173           |
| n_updates          | 1386           |
| policy_entropy     | 0.23623098     |
| policy_loss        | -0.00020166137 |
| serial_timesteps   | 4435200        |
| time_elapsed       | 4.49e+03       |
| total_timesteps    | 4435200        |
| value_loss         | 8.29079        |
---------------------------------------
--------------------------------------
| approxkl           | 0.00029214637 |
| clipfrac           | 0.0025        |
| explained_variance | -11           |
| fps                | 1164          |
| n_updates          | 1387          |
| policy_entropy     | 0.14736313    |
| policy_loss        | -0.0008044327 |
| serial_timesteps   | 4438400       |
| time_elapsed       | 4.49e+03      |
| total_timesteps    | 4438400       |
| value_loss         | 0.6009338     |
------------

---------------------------------------
| approxkl           | 2.6748014e-05  |
| clipfrac           | 7.8125e-05     |
| explained_variance | 0.842          |
| fps                | 1228           |
| n_updates          | 1402           |
| policy_entropy     | 0.036682863    |
| policy_loss        | -0.00011481188 |
| serial_timesteps   | 4486400        |
| time_elapsed       | 4.53e+03       |
| total_timesteps    | 4486400        |
| value_loss         | 11.910609      |
---------------------------------------
---------------------------------------
| approxkl           | 4.3074397e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.924          |
| fps                | 1176           |
| n_updates          | 1403           |
| policy_entropy     | 0.14272028     |
| policy_loss        | -6.0133635e-07 |
| serial_timesteps   | 4489600        |
| time_elapsed       | 4.53e+03       |
| total_timesteps    | 4489600        |
| value_loss         | 1.4747248      |


--------------------------------------
| approxkl           | 0.00037909625 |
| clipfrac           | 0.003046875   |
| explained_variance | 0.88          |
| fps                | 1088          |
| n_updates          | 1418          |
| policy_entropy     | 0.12450009    |
| policy_loss        | -0.0007906327 |
| serial_timesteps   | 4537600       |
| time_elapsed       | 4.58e+03      |
| total_timesteps    | 4537600       |
| value_loss         | 4.2297025     |
--------------------------------------
--------------------------------------
| approxkl           | 0.00010937215 |
| clipfrac           | 0.000859375   |
| explained_variance | 0.952         |
| fps                | 1156          |
| n_updates          | 1419          |
| policy_entropy     | 0.10318451    |
| policy_loss        | -0.0001434977 |
| serial_timesteps   | 4540800       |
| time_elapsed       | 4.58e+03      |
| total_timesteps    | 4540800       |
| value_loss         | 3.0751524     |
-------------------------

---------------------------------------
| approxkl           | 1.78357e-05    |
| clipfrac           | 0.0            |
| explained_variance | 0.882          |
| fps                | 1121           |
| n_updates          | 1435           |
| policy_entropy     | 0.13783619     |
| policy_loss        | -8.3213636e-05 |
| serial_timesteps   | 4592000        |
| time_elapsed       | 4.62e+03       |
| total_timesteps    | 4592000        |
| value_loss         | 4.868822       |
---------------------------------------
--------------------------------------
| approxkl           | 2.5367866e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.975         |
| fps                | 1136          |
| n_updates          | 1436          |
| policy_entropy     | 0.12227596    |
| policy_loss        | 3.495626e-05  |
| serial_timesteps   | 4595200       |
| time_elapsed       | 4.63e+03      |
| total_timesteps    | 4595200       |
| value_loss         | 2.3541694     |
------------

---------------------------------------
| approxkl           | 0.00034906564  |
| clipfrac           | 0.00390625     |
| explained_variance | 0.965          |
| fps                | 1100           |
| n_updates          | 1452           |
| policy_entropy     | 0.12522517     |
| policy_loss        | -0.00015173241 |
| serial_timesteps   | 4646400        |
| time_elapsed       | 4.68e+03       |
| total_timesteps    | 4646400        |
| value_loss         | 2.851958       |
---------------------------------------
---------------------------------------
| approxkl           | 0.00030986842  |
| clipfrac           | 0.0032812501   |
| explained_variance | -3             |
| fps                | 1070           |
| n_updates          | 1453           |
| policy_entropy     | 0.15731688     |
| policy_loss        | -0.00025474548 |
| serial_timesteps   | 4649600        |
| time_elapsed       | 4.68e+03       |
| total_timesteps    | 4649600        |
| value_loss         | 0.16271783     |


-------------------------------------
| approxkl           | 0.0049179536 |
| clipfrac           | 0.063515626  |
| explained_variance | 0.897        |
| fps                | 1088         |
| n_updates          | 1469         |
| policy_entropy     | 0.28768542   |
| policy_loss        | 0.008365275  |
| serial_timesteps   | 4700800      |
| time_elapsed       | 4.72e+03     |
| total_timesteps    | 4700800      |
| value_loss         | 4.150985     |
-------------------------------------
-------------------------------------
| approxkl           | 0.0005098843 |
| clipfrac           | 0.005390625  |
| explained_variance | 0.966        |
| fps                | 1098         |
| n_updates          | 1470         |
| policy_entropy     | 0.13145885   |
| policy_loss        | 0.0005308863 |
| serial_timesteps   | 4704000      |
| time_elapsed       | 4.73e+03     |
| total_timesteps    | 4704000      |
| value_loss         | 2.8537107    |
-------------------------------------
------------

--------------------------------------
| approxkl           | 0.00011402231 |
| clipfrac           | 0.001015625   |
| explained_variance | 0.888         |
| fps                | 1069          |
| n_updates          | 1486          |
| policy_entropy     | 0.12145337    |
| policy_loss        | 0.000291591   |
| serial_timesteps   | 4755200       |
| time_elapsed       | 4.77e+03      |
| total_timesteps    | 4755200       |
| value_loss         | 1.5265651     |
--------------------------------------
--------------------------------------
| approxkl           | 4.0615203e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.876         |
| fps                | 1122          |
| n_updates          | 1487          |
| policy_entropy     | 0.145332      |
| policy_loss        | -9.766034e-06 |
| serial_timesteps   | 4758400       |
| time_elapsed       | 4.78e+03      |
| total_timesteps    | 4758400       |
| value_loss         | 4.091963      |
-------------------------

--------------------------------------
| approxkl           | 0.00013026048 |
| clipfrac           | 0.001484375   |
| explained_variance | 0.901         |
| fps                | 1118          |
| n_updates          | 1503          |
| policy_entropy     | 0.074961826   |
| policy_loss        | -4.461769e-05 |
| serial_timesteps   | 4809600       |
| time_elapsed       | 4.82e+03      |
| total_timesteps    | 4809600       |
| value_loss         | 7.3271537     |
--------------------------------------
---------------------------------------
| approxkl           | 6.634187e-05   |
| clipfrac           | 0.000625       |
| explained_variance | 0.9            |
| fps                | 1131           |
| n_updates          | 1504           |
| policy_entropy     | 0.06671977     |
| policy_loss        | -0.00013344377 |
| serial_timesteps   | 4812800        |
| time_elapsed       | 4.83e+03       |
| total_timesteps    | 4812800        |
| value_loss         | 7.054882       |
-------------

---------------------------------------
| approxkl           | 0.00010538574  |
| clipfrac           | 7.8125e-05     |
| explained_variance | 0.914          |
| fps                | 1149           |
| n_updates          | 1520           |
| policy_entropy     | 0.17788324     |
| policy_loss        | -0.00047751443 |
| serial_timesteps   | 4864000        |
| time_elapsed       | 4.88e+03       |
| total_timesteps    | 4864000        |
| value_loss         | 3.7070453      |
---------------------------------------
--------------------------------------
| approxkl           | 0.00031351775 |
| clipfrac           | 0.0065625     |
| explained_variance | 0.918         |
| fps                | 1102          |
| n_updates          | 1521          |
| policy_entropy     | 0.19314174    |
| policy_loss        | 0.00022279247 |
| serial_timesteps   | 4867200       |
| time_elapsed       | 4.88e+03      |
| total_timesteps    | 4867200       |
| value_loss         | 3.6255608     |
------------

--------------------------------------
| approxkl           | 0.00014769535 |
| clipfrac           | 0.000859375   |
| explained_variance | -8.5          |
| fps                | 1105          |
| n_updates          | 1536          |
| policy_entropy     | 0.153931      |
| policy_loss        | -0.0007192367 |
| serial_timesteps   | 4915200       |
| time_elapsed       | 4.92e+03      |
| total_timesteps    | 4915200       |
| value_loss         | 0.37807006    |
--------------------------------------
--------------------------------------
| approxkl           | 0.00025103166 |
| clipfrac           | 0.0028125     |
| explained_variance | 0.967         |
| fps                | 1100          |
| n_updates          | 1537          |
| policy_entropy     | 0.08893759    |
| policy_loss        | 0.00018299297 |
| serial_timesteps   | 4918400       |
| time_elapsed       | 4.93e+03      |
| total_timesteps    | 4918400       |
| value_loss         | 3.7979023     |
-------------------------

-------------------------------------
| approxkl           | 0.0026159578 |
| clipfrac           | 0.0171875    |
| explained_variance | -2.92        |
| fps                | 1082         |
| n_updates          | 1553         |
| policy_entropy     | 0.14190838   |
| policy_loss        | -0.001053312 |
| serial_timesteps   | 4969600      |
| time_elapsed       | 4.97e+03     |
| total_timesteps    | 4969600      |
| value_loss         | 0.11880896   |
-------------------------------------
--------------------------------------
| approxkl           | 0.00059405674 |
| clipfrac           | 0.005703125   |
| explained_variance | 0.957         |
| fps                | 1091          |
| n_updates          | 1554          |
| policy_entropy     | 0.11757274    |
| policy_loss        | 0.00015027083 |
| serial_timesteps   | 4972800       |
| time_elapsed       | 4.98e+03      |
| total_timesteps    | 4972800       |
| value_loss         | 2.927855      |
--------------------------------------

## Testing lesson1

In [85]:
# model_lesson1 = PPO2.load(model_names[1])
test(env_lesson1, model_lesson1, render = True, total=20)

Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished
Episode 7 finished
Episode 8 finished
Episode 9 finished
Episode 10 finished
Episode 11 finished
Episode 12 finished
Episode 13 finished
Episode 14 finished
Episode 15 finished
Episode 16 finished
Episode 17 finished
Episode 18 finished
Episode 19 finished
Win  20 / 20  games
Tie  0 / 20  games
Lose  0 / 20  games


## Training lesson 2
### 11x11 grid with 8 wooden boxes

In [60]:
config_lesson2 = wood_box_lesson2_env()
env_lesson2 = initialize_env(config_lesson2)

In [61]:
# model_lesson1 = PPO2.load(load_path = model_names[1],
#                           tensorboard_log = "./ppo2_pommerman_box_collect_tensorboard/")
model_lesson2 = train(model_name = model_names[2], 
                      model = model_lesson1,
                      env = env_lesson2, 
                      n_steps = n_steps, 
                      total_timesteps = total_timestep)
# model_lesson2.save(model_names[2])

--------------------------------------
| approxkl           | 4.3879052e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.929         |
| fps                | 1137          |
| n_updates          | 1             |
| policy_entropy     | 0.12292739    |
| policy_loss        | 3.6404278e-06 |
| serial_timesteps   | 3200          |
| time_elapsed       | 2.15e-06      |
| total_timesteps    | 3200          |
| value_loss         | 3.5194335     |
--------------------------------------
---------------------------------------
| approxkl           | 5.3898133e-05  |
| clipfrac           | 0.0003125      |
| explained_variance | 0.847          |
| fps                | 1179           |
| n_updates          | 2              |
| policy_entropy     | 0.13614        |
| policy_loss        | -0.00018177349 |
| serial_timesteps   | 6400           |
| time_elapsed       | 2.83           |
| total_timesteps    | 6400           |
| value_loss         | 8.935968       |
-------------

-------------------------------------
| approxkl           | 0.0017518915 |
| clipfrac           | 0.016640626  |
| explained_variance | 0.847        |
| fps                | 1091         |
| n_updates          | 18           |
| policy_entropy     | 0.18273216   |
| policy_loss        | -0.000776406 |
| serial_timesteps   | 57600        |
| time_elapsed       | 48.5         |
| total_timesteps    | 57600        |
| value_loss         | 4.6277075    |
-------------------------------------
---------------------------------------
| approxkl           | 0.00018387841  |
| clipfrac           | 0.00140625     |
| explained_variance | 0.951          |
| fps                | 1160           |
| n_updates          | 19             |
| policy_entropy     | 0.07635873     |
| policy_loss        | -2.4388362e-05 |
| serial_timesteps   | 60800          |
| time_elapsed       | 51.5           |
| total_timesteps    | 60800          |
| value_loss         | 4.8112736      |
--------------------------

--------------------------------------
| approxkl           | 5.6442505e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.901         |
| fps                | 1203          |
| n_updates          | 35            |
| policy_entropy     | 0.06712589    |
| policy_loss        | 3.3569486e-05 |
| serial_timesteps   | 112000        |
| time_elapsed       | 97.1          |
| total_timesteps    | 112000        |
| value_loss         | 8.173052      |
--------------------------------------
--------------------------------------
| approxkl           | 1.8778277e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.904         |
| fps                | 1134          |
| n_updates          | 36            |
| policy_entropy     | 0.11758936    |
| policy_loss        | -3.133729e-05 |
| serial_timesteps   | 115200        |
| time_elapsed       | 99.7          |
| total_timesteps    | 115200        |
| value_loss         | 4.0196047     |
-------------------------

--------------------------------------
| approxkl           | 2.5123958e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.911         |
| fps                | 1065          |
| n_updates          | 51            |
| policy_entropy     | 0.16356662    |
| policy_loss        | 7.882886e-05  |
| serial_timesteps   | 163200        |
| time_elapsed       | 142           |
| total_timesteps    | 163200        |
| value_loss         | 8.583174      |
--------------------------------------
--------------------------------------
| approxkl           | 9.908534e-06  |
| clipfrac           | 0.0           |
| explained_variance | 0.953         |
| fps                | 1012          |
| n_updates          | 52            |
| policy_entropy     | 0.07980837    |
| policy_loss        | -7.385791e-05 |
| serial_timesteps   | 166400        |
| time_elapsed       | 145           |
| total_timesteps    | 166400        |
| value_loss         | 5.535645      |
-------------------------

---------------------------------------
| approxkl           | 7.3347096e-06  |
| clipfrac           | 0.0            |
| explained_variance | 0.92           |
| fps                | 1132           |
| n_updates          | 68             |
| policy_entropy     | 0.084151104    |
| policy_loss        | -2.7946346e-05 |
| serial_timesteps   | 217600         |
| time_elapsed       | 194            |
| total_timesteps    | 217600         |
| value_loss         | 6.6775417      |
---------------------------------------
---------------------------------------
| approxkl           | 1.3396701e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.938          |
| fps                | 1105           |
| n_updates          | 69             |
| policy_entropy     | 0.11674805     |
| policy_loss        | -5.5667086e-05 |
| serial_timesteps   | 220800         |
| time_elapsed       | 197            |
| total_timesteps    | 220800         |
| value_loss         | 3.5916903      |


---------------------------------------
| approxkl           | 0.00017646179  |
| clipfrac           | 0.00203125     |
| explained_variance | 0.91           |
| fps                | 1140           |
| n_updates          | 85             |
| policy_entropy     | 0.077447176    |
| policy_loss        | -3.2037646e-05 |
| serial_timesteps   | 272000         |
| time_elapsed       | 242            |
| total_timesteps    | 272000         |
| value_loss         | 10.128815      |
---------------------------------------
--------------------------------------
| approxkl           | 0.00014561797 |
| clipfrac           | 0.000859375   |
| explained_variance | -4.19         |
| fps                | 1132          |
| n_updates          | 86            |
| policy_entropy     | 0.15487075    |
| policy_loss        | -0.0004383324 |
| serial_timesteps   | 275200        |
| time_elapsed       | 245           |
| total_timesteps    | 275200        |
| value_loss         | 0.67982656    |
------------

--------------------------------------
| approxkl           | 3.1893407e-05 |
| clipfrac           | 0.00015625    |
| explained_variance | 0.88          |
| fps                | 1063          |
| n_updates          | 102           |
| policy_entropy     | 0.05247568    |
| policy_loss        | 6.3608586e-06 |
| serial_timesteps   | 326400        |
| time_elapsed       | 293           |
| total_timesteps    | 326400        |
| value_loss         | 10.042165     |
--------------------------------------
---------------------------------------
| approxkl           | 7.685697e-05   |
| clipfrac           | 0.0            |
| explained_variance | 0.963          |
| fps                | 1038           |
| n_updates          | 103            |
| policy_entropy     | 0.20022257     |
| policy_loss        | -0.00018442377 |
| serial_timesteps   | 329600         |
| time_elapsed       | 296            |
| total_timesteps    | 329600         |
| value_loss         | 4.2970357      |
-------------

---------------------------------------
| approxkl           | 4.2972657e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.933          |
| fps                | 1097           |
| n_updates          | 119            |
| policy_entropy     | 0.15126704     |
| policy_loss        | -0.00012960169 |
| serial_timesteps   | 380800         |
| time_elapsed       | 343            |
| total_timesteps    | 380800         |
| value_loss         | 1.098037       |
---------------------------------------
---------------------------------------
| approxkl           | 0.00017297929  |
| clipfrac           | 7.8125e-05     |
| explained_variance | 0.938          |
| fps                | 1113           |
| n_updates          | 120            |
| policy_entropy     | 0.12466282     |
| policy_loss        | -0.00019570373 |
| serial_timesteps   | 384000         |
| time_elapsed       | 346            |
| total_timesteps    | 384000         |
| value_loss         | 8.948064       |


--------------------------------------
| approxkl           | 0.0012047063  |
| clipfrac           | 0.012421875   |
| explained_variance | 0.967         |
| fps                | 1098          |
| n_updates          | 136           |
| policy_entropy     | 0.22702104    |
| policy_loss        | -0.0009540202 |
| serial_timesteps   | 435200        |
| time_elapsed       | 391           |
| total_timesteps    | 435200        |
| value_loss         | 1.1313388     |
--------------------------------------
--------------------------------------
| approxkl           | 0.00010567988 |
| clipfrac           | 0.0           |
| explained_variance | 0.945         |
| fps                | 1138          |
| n_updates          | 137           |
| policy_entropy     | 0.15776877    |
| policy_loss        | -6.478951e-05 |
| serial_timesteps   | 438400        |
| time_elapsed       | 394           |
| total_timesteps    | 438400        |
| value_loss         | 5.4436293     |
-------------------------

--------------------------------------
| approxkl           | 0.00018114416 |
| clipfrac           | 0.0           |
| explained_variance | 0.937         |
| fps                | 1191          |
| n_updates          | 152           |
| policy_entropy     | 0.21818188    |
| policy_loss        | 9.819999e-05  |
| serial_timesteps   | 486400        |
| time_elapsed       | 435           |
| total_timesteps    | 486400        |
| value_loss         | 8.286078      |
--------------------------------------
---------------------------------------
| approxkl           | 6.072098e-05   |
| clipfrac           | 0.0            |
| explained_variance | 0.94           |
| fps                | 1150           |
| n_updates          | 153            |
| policy_entropy     | 0.16001146     |
| policy_loss        | -0.00011559511 |
| serial_timesteps   | 489600         |
| time_elapsed       | 438            |
| total_timesteps    | 489600         |
| value_loss         | 3.691827       |
-------------

---------------------------------------
| approxkl           | 4.2717243e-06  |
| clipfrac           | 0.0            |
| explained_variance | 0.959          |
| fps                | 1050           |
| n_updates          | 169            |
| policy_entropy     | 0.11121853     |
| policy_loss        | -4.1849917e-06 |
| serial_timesteps   | 540800         |
| time_elapsed       | 483            |
| total_timesteps    | 540800         |
| value_loss         | 5.319988       |
---------------------------------------
---------------------------------------
| approxkl           | 0.00014173285  |
| clipfrac           | 0.0            |
| explained_variance | 0.962          |
| fps                | 1107           |
| n_updates          | 170            |
| policy_entropy     | 0.17874174     |
| policy_loss        | -0.00024000107 |
| serial_timesteps   | 544000         |
| time_elapsed       | 486            |
| total_timesteps    | 544000         |
| value_loss         | 3.685996       |


--------------------------------------
| approxkl           | 0.0012514264  |
| clipfrac           | 0.0165625     |
| explained_variance | 0.917         |
| fps                | 1092          |
| n_updates          | 185           |
| policy_entropy     | 0.20186087    |
| policy_loss        | -0.0026219548 |
| serial_timesteps   | 592000        |
| time_elapsed       | 532           |
| total_timesteps    | 592000        |
| value_loss         | 6.59661       |
--------------------------------------
--------------------------------------
| approxkl           | 0.00016159427 |
| clipfrac           | 0.0003125     |
| explained_variance | 0.95          |
| fps                | 1043          |
| n_updates          | 186           |
| policy_entropy     | 0.12980705    |
| policy_loss        | 6.380513e-05  |
| serial_timesteps   | 595200        |
| time_elapsed       | 535           |
| total_timesteps    | 595200        |
| value_loss         | 5.608485      |
-------------------------

--------------------------------------
| approxkl           | 9.519554e-05  |
| clipfrac           | 0.000234375   |
| explained_variance | 0.961         |
| fps                | 1053          |
| n_updates          | 201           |
| policy_entropy     | 0.1222024     |
| policy_loss        | -8.506924e-06 |
| serial_timesteps   | 643200        |
| time_elapsed       | 581           |
| total_timesteps    | 643200        |
| value_loss         | 3.2330325     |
--------------------------------------
---------------------------------------
| approxkl           | 5.6666624e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.963          |
| fps                | 1024           |
| n_updates          | 202            |
| policy_entropy     | 0.11629668     |
| policy_loss        | -0.00011696085 |
| serial_timesteps   | 646400         |
| time_elapsed       | 584            |
| total_timesteps    | 646400         |
| value_loss         | 3.0816123      |
-------------

---------------------------------------
| approxkl           | 3.5883444e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.931          |
| fps                | 1159           |
| n_updates          | 217            |
| policy_entropy     | 0.16865663     |
| policy_loss        | -0.00044303507 |
| serial_timesteps   | 694400         |
| time_elapsed       | 626            |
| total_timesteps    | 694400         |
| value_loss         | 3.8436887      |
---------------------------------------
---------------------------------------
| approxkl           | 7.576068e-05   |
| clipfrac           | 0.000234375    |
| explained_variance | 0.938          |
| fps                | 1226           |
| n_updates          | 218            |
| policy_entropy     | 0.0960379      |
| policy_loss        | -0.00014268473 |
| serial_timesteps   | 697600         |
| time_elapsed       | 629            |
| total_timesteps    | 697600         |
| value_loss         | 6.4834967      |


--------------------------------------
| approxkl           | 4.2008225e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.954         |
| fps                | 1146          |
| n_updates          | 233           |
| policy_entropy     | 0.21852447    |
| policy_loss        | -6.830216e-05 |
| serial_timesteps   | 745600        |
| time_elapsed       | 670           |
| total_timesteps    | 745600        |
| value_loss         | 3.2434535     |
--------------------------------------
---------------------------------------
| approxkl           | 9.82359e-06    |
| clipfrac           | 0.0            |
| explained_variance | 0.909          |
| fps                | 1070           |
| n_updates          | 234            |
| policy_entropy     | 0.20469779     |
| policy_loss        | -3.1134487e-06 |
| serial_timesteps   | 748800         |
| time_elapsed       | 673            |
| total_timesteps    | 748800         |
| value_loss         | 8.73467        |
-------------

--------------------------------------
| approxkl           | 1.842228e-05  |
| clipfrac           | 0.0           |
| explained_variance | 0.961         |
| fps                | 1082          |
| n_updates          | 249           |
| policy_entropy     | 0.13336982    |
| policy_loss        | 2.9096753e-05 |
| serial_timesteps   | 796800        |
| time_elapsed       | 717           |
| total_timesteps    | 796800        |
| value_loss         | 3.3851175     |
--------------------------------------
--------------------------------------
| approxkl           | 6.485912e-05  |
| clipfrac           | 0.0           |
| explained_variance | 0.941         |
| fps                | 1036          |
| n_updates          | 250           |
| policy_entropy     | 0.19222578    |
| policy_loss        | -6.219782e-05 |
| serial_timesteps   | 800000        |
| time_elapsed       | 720           |
| total_timesteps    | 800000        |
| value_loss         | 6.110301      |
-------------------------

--------------------------------------
| approxkl           | 0.00012740938 |
| clipfrac           | 0.0           |
| explained_variance | 0.93          |
| fps                | 1060          |
| n_updates          | 266           |
| policy_entropy     | 0.14900467    |
| policy_loss        | 0.00017021023 |
| serial_timesteps   | 851200        |
| time_elapsed       | 769           |
| total_timesteps    | 851200        |
| value_loss         | 8.175226      |
--------------------------------------
--------------------------------------
| approxkl           | 0.00019880747 |
| clipfrac           | 0.00046875    |
| explained_variance | 0.846         |
| fps                | 1067          |
| n_updates          | 267           |
| policy_entropy     | 0.1967706     |
| policy_loss        | -9.764195e-05 |
| serial_timesteps   | 854400        |
| time_elapsed       | 772           |
| total_timesteps    | 854400        |
| value_loss         | 5.996275      |
-------------------------

---------------------------------------
| approxkl           | 0.0001386128   |
| clipfrac           | 0.0014843751   |
| explained_variance | 0.945          |
| fps                | 1094           |
| n_updates          | 283            |
| policy_entropy     | 0.104001395    |
| policy_loss        | -0.00035508233 |
| serial_timesteps   | 905600         |
| time_elapsed       | 819            |
| total_timesteps    | 905600         |
| value_loss         | 6.9650044      |
---------------------------------------
---------------------------------------
| approxkl           | 4.003717e-05   |
| clipfrac           | 0.0            |
| explained_variance | 0.938          |
| fps                | 1090           |
| n_updates          | 284            |
| policy_entropy     | 0.09852991     |
| policy_loss        | -0.00011076167 |
| serial_timesteps   | 908800         |
| time_elapsed       | 822            |
| total_timesteps    | 908800         |
| value_loss         | 6.152686       |


--------------------------------------
| approxkl           | 0.00069728144 |
| clipfrac           | 0.00984375    |
| explained_variance | 0.954         |
| fps                | 1109          |
| n_updates          | 299           |
| policy_entropy     | 0.13455355    |
| policy_loss        | 0.0003790938  |
| serial_timesteps   | 956800        |
| time_elapsed       | 866           |
| total_timesteps    | 956800        |
| value_loss         | 3.8277435     |
--------------------------------------
---------------------------------------
| approxkl           | 0.00015546872  |
| clipfrac           | 0.000546875    |
| explained_variance | 0.937          |
| fps                | 1078           |
| n_updates          | 300            |
| policy_entropy     | 0.14656417     |
| policy_loss        | -0.00010678306 |
| serial_timesteps   | 960000         |
| time_elapsed       | 869            |
| total_timesteps    | 960000         |
| value_loss         | 6.2418556      |
-------------

--------------------------------------
| approxkl           | 9.0858164e-05 |
| clipfrac           | 0.000234375   |
| explained_variance | 0.855         |
| fps                | 1121          |
| n_updates          | 316           |
| policy_entropy     | 0.15196541    |
| policy_loss        | 0.00011470303 |
| serial_timesteps   | 1011200       |
| time_elapsed       | 915           |
| total_timesteps    | 1011200       |
| value_loss         | 5.0656004     |
--------------------------------------
--------------------------------------
| approxkl           | 3.8826132e-05 |
| clipfrac           | 7.8125e-05    |
| explained_variance | 0.931         |
| fps                | 1158          |
| n_updates          | 317           |
| policy_entropy     | 0.21876945    |
| policy_loss        | 1.2551995e-05 |
| serial_timesteps   | 1014400       |
| time_elapsed       | 918           |
| total_timesteps    | 1014400       |
| value_loss         | 6.34434       |
-------------------------

--------------------------------------
| approxkl           | 3.0479625e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.894         |
| fps                | 1200          |
| n_updates          | 333           |
| policy_entropy     | 0.05895619    |
| policy_loss        | -5.820006e-05 |
| serial_timesteps   | 1065600       |
| time_elapsed       | 962           |
| total_timesteps    | 1065600       |
| value_loss         | 10.041995     |
--------------------------------------
---------------------------------------
| approxkl           | 7.84604e-06    |
| clipfrac           | 0.0            |
| explained_variance | 0.946          |
| fps                | 1123           |
| n_updates          | 334            |
| policy_entropy     | 0.095455304    |
| policy_loss        | -1.2701526e-05 |
| serial_timesteps   | 1068800        |
| time_elapsed       | 964            |
| total_timesteps    | 1068800        |
| value_loss         | 5.6915874      |
-------------

--------------------------------------
| approxkl           | 6.9967164e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.948         |
| fps                | 1148          |
| n_updates          | 350           |
| policy_entropy     | 0.09410102    |
| policy_loss        | 6.7913534e-06 |
| serial_timesteps   | 1120000       |
| time_elapsed       | 1.01e+03      |
| total_timesteps    | 1120000       |
| value_loss         | 5.7822475     |
--------------------------------------
--------------------------------------
| approxkl           | 4.697582e-06  |
| clipfrac           | 0.0           |
| explained_variance | 0.93          |
| fps                | 1149          |
| n_updates          | 351           |
| policy_entropy     | 0.0857969     |
| policy_loss        | -2.483666e-05 |
| serial_timesteps   | 1123200       |
| time_elapsed       | 1.02e+03      |
| total_timesteps    | 1123200       |
| value_loss         | 7.745407      |
-------------------------

--------------------------------------
| approxkl           | 2.3832808e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.941         |
| fps                | 1138          |
| n_updates          | 367           |
| policy_entropy     | 0.09505767    |
| policy_loss        | 2.1830872e-05 |
| serial_timesteps   | 1174400       |
| time_elapsed       | 1.06e+03      |
| total_timesteps    | 1174400       |
| value_loss         | 6.2411723     |
--------------------------------------
-------------------------------------
| approxkl           | 2.735045e-05 |
| clipfrac           | 0.0          |
| explained_variance | 0.959        |
| fps                | 1140         |
| n_updates          | 368          |
| policy_entropy     | 0.10889617   |
| policy_loss        | 5.092651e-05 |
| serial_timesteps   | 1177600      |
| time_elapsed       | 1.06e+03     |
| total_timesteps    | 1177600      |
| value_loss         | 5.665311     |
-------------------------------------

---------------------------------------
| approxkl           | 1.8578921e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.906          |
| fps                | 1034           |
| n_updates          | 384            |
| policy_entropy     | 0.16923591     |
| policy_loss        | -3.5581066e-05 |
| serial_timesteps   | 1228800        |
| time_elapsed       | 1.11e+03       |
| total_timesteps    | 1228800        |
| value_loss         | 9.124766       |
---------------------------------------
---------------------------------------
| approxkl           | 4.8360023e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.943          |
| fps                | 1035           |
| n_updates          | 385            |
| policy_entropy     | 0.1474989      |
| policy_loss        | -1.9916408e-05 |
| serial_timesteps   | 1232000        |
| time_elapsed       | 1.12e+03       |
| total_timesteps    | 1232000        |
| value_loss         | 4.8729925      |


--------------------------------------
| approxkl           | 0.00043143617 |
| clipfrac           | 0.004296875   |
| explained_variance | -9.87         |
| fps                | 1177          |
| n_updates          | 401           |
| policy_entropy     | 0.20425226    |
| policy_loss        | -0.0007668872 |
| serial_timesteps   | 1283200       |
| time_elapsed       | 1.16e+03      |
| total_timesteps    | 1283200       |
| value_loss         | 0.86106586    |
--------------------------------------
--------------------------------------
| approxkl           | 0.0004886979  |
| clipfrac           | 0.011015626   |
| explained_variance | 0.95          |
| fps                | 1161          |
| n_updates          | 402           |
| policy_entropy     | 0.16378702    |
| policy_loss        | 0.00082231616 |
| serial_timesteps   | 1286400       |
| time_elapsed       | 1.16e+03      |
| total_timesteps    | 1286400       |
| value_loss         | 6.054696      |
-------------------------

--------------------------------------
| approxkl           | 1.9188476e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.973         |
| fps                | 1138          |
| n_updates          | 418           |
| policy_entropy     | 0.13113993    |
| policy_loss        | -3.269896e-05 |
| serial_timesteps   | 1337600       |
| time_elapsed       | 1.21e+03      |
| total_timesteps    | 1337600       |
| value_loss         | 2.9506803     |
--------------------------------------
--------------------------------------
| approxkl           | 2.4053745e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.936         |
| fps                | 1189          |
| n_updates          | 419           |
| policy_entropy     | 0.10368577    |
| policy_loss        | 2.6543588e-05 |
| serial_timesteps   | 1340800       |
| time_elapsed       | 1.21e+03      |
| total_timesteps    | 1340800       |
| value_loss         | 6.5130973     |
-------------------------

---------------------------------------
| approxkl           | 0.00044539402  |
| clipfrac           | 0.005390625    |
| explained_variance | 0.953          |
| fps                | 1118           |
| n_updates          | 435            |
| policy_entropy     | 0.14781146     |
| policy_loss        | -8.2341096e-05 |
| serial_timesteps   | 1392000        |
| time_elapsed       | 1.26e+03       |
| total_timesteps    | 1392000        |
| value_loss         | 3.1879482      |
---------------------------------------
--------------------------------------
| approxkl           | 0.0012151196  |
| clipfrac           | 0.025078125   |
| explained_variance | 0.946         |
| fps                | 1139          |
| n_updates          | 436           |
| policy_entropy     | 0.30980888    |
| policy_loss        | -0.0016890853 |
| serial_timesteps   | 1395200       |
| time_elapsed       | 1.26e+03      |
| total_timesteps    | 1395200       |
| value_loss         | 3.6206353     |
------------

---------------------------------------
| approxkl           | 4.1869534e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.945          |
| fps                | 1202           |
| n_updates          | 452            |
| policy_entropy     | 0.11591842     |
| policy_loss        | -4.1367486e-05 |
| serial_timesteps   | 1446400        |
| time_elapsed       | 1.3e+03        |
| total_timesteps    | 1446400        |
| value_loss         | 6.016835       |
---------------------------------------
-------------------------------------
| approxkl           | 1.645699e-05 |
| clipfrac           | 0.0          |
| explained_variance | 0.913        |
| fps                | 1140         |
| n_updates          | 453          |
| policy_entropy     | 0.15850827   |
| policy_loss        | 9.03453e-05  |
| serial_timesteps   | 1449600      |
| time_elapsed       | 1.31e+03     |
| total_timesteps    | 1449600      |
| value_loss         | 4.288434     |
------------------------

-------------------------------------
| approxkl           | 0.0006192792 |
| clipfrac           | 0.00015625   |
| explained_variance | 0.915        |
| fps                | 1071         |
| n_updates          | 469          |
| policy_entropy     | 0.23354933   |
| policy_loss        | 0.0003870986 |
| serial_timesteps   | 1500800      |
| time_elapsed       | 1.35e+03     |
| total_timesteps    | 1500800      |
| value_loss         | 7.452142     |
-------------------------------------
--------------------------------------
| approxkl           | 7.4333393e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.944         |
| fps                | 1080          |
| n_updates          | 470           |
| policy_entropy     | 0.085449494   |
| policy_loss        | -5.593099e-05 |
| serial_timesteps   | 1504000       |
| time_elapsed       | 1.36e+03      |
| total_timesteps    | 1504000       |
| value_loss         | 7.028273      |
--------------------------------------

---------------------------------------
| approxkl           | 0.0003427472   |
| clipfrac           | 0.00296875     |
| explained_variance | -11.8          |
| fps                | 1044           |
| n_updates          | 486            |
| policy_entropy     | 0.174277       |
| policy_loss        | -0.00050731987 |
| serial_timesteps   | 1555200        |
| time_elapsed       | 1.4e+03        |
| total_timesteps    | 1555200        |
| value_loss         | 0.5803207      |
---------------------------------------
---------------------------------------
| approxkl           | 0.000529668    |
| clipfrac           | 0.001484375    |
| explained_variance | 0.951          |
| fps                | 1094           |
| n_updates          | 487            |
| policy_entropy     | 0.28042674     |
| policy_loss        | -0.00064586743 |
| serial_timesteps   | 1558400        |
| time_elapsed       | 1.41e+03       |
| total_timesteps    | 1558400        |
| value_loss         | 3.2495754      |


--------------------------------------
| approxkl           | 0.00011205296 |
| clipfrac           | 0.00109375    |
| explained_variance | 0.959         |
| fps                | 1154          |
| n_updates          | 502           |
| policy_entropy     | 0.08721062    |
| policy_loss        | 2.4447441e-05 |
| serial_timesteps   | 1606400       |
| time_elapsed       | 1.45e+03      |
| total_timesteps    | 1606400       |
| value_loss         | 5.5218797     |
--------------------------------------
--------------------------------------
| approxkl           | 0.00033068907 |
| clipfrac           | 0.00093750004 |
| explained_variance | 0.895         |
| fps                | 1182          |
| n_updates          | 503           |
| policy_entropy     | 0.23440196    |
| policy_loss        | 7.099144e-05  |
| serial_timesteps   | 1609600       |
| time_elapsed       | 1.46e+03      |
| total_timesteps    | 1609600       |
| value_loss         | 8.20612       |
-------------------------

--------------------------------------
| approxkl           | 1.0492282e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.957         |
| fps                | 1064          |
| n_updates          | 519           |
| policy_entropy     | 0.14672118    |
| policy_loss        | 2.143383e-05  |
| serial_timesteps   | 1660800       |
| time_elapsed       | 1.5e+03       |
| total_timesteps    | 1660800       |
| value_loss         | 3.3140786     |
--------------------------------------
--------------------------------------
| approxkl           | 8.933339e-06  |
| clipfrac           | 0.0           |
| explained_variance | 0.939         |
| fps                | 1103          |
| n_updates          | 520           |
| policy_entropy     | 0.09844169    |
| policy_loss        | 1.5988201e-06 |
| serial_timesteps   | 1664000       |
| time_elapsed       | 1.5e+03       |
| total_timesteps    | 1664000       |
| value_loss         | 6.0648375     |
-------------------------

--------------------------------------
| approxkl           | 3.9654587e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.956         |
| fps                | 1120          |
| n_updates          | 535           |
| policy_entropy     | 0.116097815   |
| policy_loss        | 1.1187197e-05 |
| serial_timesteps   | 1712000       |
| time_elapsed       | 1.54e+03      |
| total_timesteps    | 1712000       |
| value_loss         | 5.2769065     |
--------------------------------------
--------------------------------------
| approxkl           | 0.00059131114 |
| clipfrac           | 0.007421875   |
| explained_variance | -8.75         |
| fps                | 1095          |
| n_updates          | 536           |
| policy_entropy     | 0.18502948    |
| policy_loss        | -0.000562101  |
| serial_timesteps   | 1715200       |
| time_elapsed       | 1.55e+03      |
| total_timesteps    | 1715200       |
| value_loss         | 0.3809954     |
-------------------------

---------------------------------------
| approxkl           | 8.030416e-06   |
| clipfrac           | 0.0            |
| explained_variance | 0.913          |
| fps                | 1142           |
| n_updates          | 551            |
| policy_entropy     | 0.118064225    |
| policy_loss        | -8.8773686e-07 |
| serial_timesteps   | 1763200        |
| time_elapsed       | 1.59e+03       |
| total_timesteps    | 1763200        |
| value_loss         | 4.440213       |
---------------------------------------
--------------------------------------
| approxkl           | 3.252893e-05  |
| clipfrac           | 0.0           |
| explained_variance | 0.901         |
| fps                | 1153          |
| n_updates          | 552           |
| policy_entropy     | 0.15650174    |
| policy_loss        | 4.7980175e-05 |
| serial_timesteps   | 1766400       |
| time_elapsed       | 1.59e+03      |
| total_timesteps    | 1766400       |
| value_loss         | 3.6773965     |
------------

--------------------------------------
| approxkl           | 2.5364432e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.946         |
| fps                | 1092          |
| n_updates          | 568           |
| policy_entropy     | 0.09691383    |
| policy_loss        | -6.914138e-08 |
| serial_timesteps   | 1817600       |
| time_elapsed       | 1.64e+03      |
| total_timesteps    | 1817600       |
| value_loss         | 6.227658      |
--------------------------------------
--------------------------------------
| approxkl           | 3.0716157e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.953         |
| fps                | 1120          |
| n_updates          | 569           |
| policy_entropy     | 0.18653527    |
| policy_loss        | 2.9795021e-05 |
| serial_timesteps   | 1820800       |
| time_elapsed       | 1.64e+03      |
| total_timesteps    | 1820800       |
| value_loss         | 5.3899446     |
-------------------------

---------------------------------------
| approxkl           | 1.1201514e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.921          |
| fps                | 852            |
| n_updates          | 585            |
| policy_entropy     | 0.0973829      |
| policy_loss        | -4.4989923e-05 |
| serial_timesteps   | 1872000        |
| time_elapsed       | 1.68e+03       |
| total_timesteps    | 1872000        |
| value_loss         | 6.62013        |
---------------------------------------
--------------------------------------
| approxkl           | 2.4049157e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.959         |
| fps                | 886           |
| n_updates          | 586           |
| policy_entropy     | 0.2417738     |
| policy_loss        | -9.603212e-05 |
| serial_timesteps   | 1875200       |
| time_elapsed       | 1.69e+03      |
| total_timesteps    | 1875200       |
| value_loss         | 5.1346903     |
------------

--------------------------------------
| approxkl           | 2.3948669e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.973         |
| fps                | 1178          |
| n_updates          | 601           |
| policy_entropy     | 0.1299598     |
| policy_loss        | 1.166463e-05  |
| serial_timesteps   | 1923200       |
| time_elapsed       | 1.73e+03      |
| total_timesteps    | 1923200       |
| value_loss         | 2.7862792     |
--------------------------------------
---------------------------------------
| approxkl           | 2.2429196e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.941          |
| fps                | 1181           |
| n_updates          | 602            |
| policy_entropy     | 0.0919643      |
| policy_loss        | -0.00012662141 |
| serial_timesteps   | 1926400        |
| time_elapsed       | 1.74e+03       |
| total_timesteps    | 1926400        |
| value_loss         | 6.152937       |
-------------

--------------------------------------
| approxkl           | 2.1490494e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.959         |
| fps                | 1115          |
| n_updates          | 618           |
| policy_entropy     | 0.10763836    |
| policy_loss        | 4.0889496e-05 |
| serial_timesteps   | 1977600       |
| time_elapsed       | 1.78e+03      |
| total_timesteps    | 1977600       |
| value_loss         | 5.390465      |
--------------------------------------
---------------------------------------
| approxkl           | 0.00014242466  |
| clipfrac           | 7.8125e-05     |
| explained_variance | 0.972          |
| fps                | 1105           |
| n_updates          | 619            |
| policy_entropy     | 0.14810205     |
| policy_loss        | -0.00019692823 |
| serial_timesteps   | 1980800        |
| time_elapsed       | 1.78e+03       |
| total_timesteps    | 1980800        |
| value_loss         | 3.6416566      |
-------------

--------------------------------------
| approxkl           | 0.0012838254  |
| clipfrac           | 0.021796875   |
| explained_variance | 0.958         |
| fps                | 1143          |
| n_updates          | 634           |
| policy_entropy     | 0.28356206    |
| policy_loss        | -0.0007828519 |
| serial_timesteps   | 2028800       |
| time_elapsed       | 1.83e+03      |
| total_timesteps    | 2028800       |
| value_loss         | 3.0352988     |
--------------------------------------
---------------------------------------
| approxkl           | 0.00019396978  |
| clipfrac           | 0.0013281249   |
| explained_variance | 0.92           |
| fps                | 1116           |
| n_updates          | 635            |
| policy_entropy     | 0.103576854    |
| policy_loss        | -0.00010236711 |
| serial_timesteps   | 2032000        |
| time_elapsed       | 1.83e+03       |
| total_timesteps    | 2032000        |
| value_loss         | 6.871619       |
-------------

---------------------------------------
| approxkl           | 0.00017654162  |
| clipfrac           | 0.001328125    |
| explained_variance | -36            |
| fps                | 1101           |
| n_updates          | 651            |
| policy_entropy     | 0.18790095     |
| policy_loss        | -0.00015283687 |
| serial_timesteps   | 2083200        |
| time_elapsed       | 1.88e+03       |
| total_timesteps    | 2083200        |
| value_loss         | 1.3931099      |
---------------------------------------
---------------------------------------
| approxkl           | 0.00012984395  |
| clipfrac           | 0.00039062498  |
| explained_variance | 0.968          |
| fps                | 1100           |
| n_updates          | 652            |
| policy_entropy     | 0.17955184     |
| policy_loss        | -3.7975013e-05 |
| serial_timesteps   | 2086400        |
| time_elapsed       | 1.88e+03       |
| total_timesteps    | 2086400        |
| value_loss         | 3.2118897      |


---------------------------------------
| approxkl           | 0.00015319769  |
| clipfrac           | 0.001015625    |
| explained_variance | 0.954          |
| fps                | 1144           |
| n_updates          | 668            |
| policy_entropy     | 0.19475108     |
| policy_loss        | -1.8860697e-05 |
| serial_timesteps   | 2137600        |
| time_elapsed       | 1.93e+03       |
| total_timesteps    | 2137600        |
| value_loss         | 3.2281241      |
---------------------------------------
--------------------------------------
| approxkl           | 0.0010488649  |
| clipfrac           | 0.01390625    |
| explained_variance | 0.389         |
| fps                | 1173          |
| n_updates          | 669           |
| policy_entropy     | 0.19781178    |
| policy_loss        | -0.0004204951 |
| serial_timesteps   | 2140800       |
| time_elapsed       | 1.93e+03      |
| total_timesteps    | 2140800       |
| value_loss         | 7.678512      |
------------

---------------------------------------
| approxkl           | 2.3331188e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.969          |
| fps                | 1039           |
| n_updates          | 685            |
| policy_entropy     | 0.13944215     |
| policy_loss        | -4.2031706e-06 |
| serial_timesteps   | 2192000        |
| time_elapsed       | 1.97e+03       |
| total_timesteps    | 2192000        |
| value_loss         | 3.0607443      |
---------------------------------------
---------------------------------------
| approxkl           | 5.2810137e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.91           |
| fps                | 1059           |
| n_updates          | 686            |
| policy_entropy     | 0.1613028      |
| policy_loss        | -0.00028644796 |
| serial_timesteps   | 2195200        |
| time_elapsed       | 1.98e+03       |
| total_timesteps    | 2195200        |
| value_loss         | 9.931701       |


--------------------------------------
| approxkl           | 5.862128e-05  |
| clipfrac           | 7.8125e-05    |
| explained_variance | 0.941         |
| fps                | 1113          |
| n_updates          | 701           |
| policy_entropy     | 0.1439709     |
| policy_loss        | -0.0001783231 |
| serial_timesteps   | 2243200       |
| time_elapsed       | 2.02e+03      |
| total_timesteps    | 2243200       |
| value_loss         | 4.5936413     |
--------------------------------------
--------------------------------------
| approxkl           | 4.2083135e-05 |
| clipfrac           | 0.00015625    |
| explained_variance | 0.941         |
| fps                | 1121          |
| n_updates          | 702           |
| policy_entropy     | 0.0917888     |
| policy_loss        | 3.8268714e-05 |
| serial_timesteps   | 2246400       |
| time_elapsed       | 2.02e+03      |
| total_timesteps    | 2246400       |
| value_loss         | 6.168873      |
-------------------------

--------------------------------------
| approxkl           | 3.368761e-05  |
| clipfrac           | 0.0           |
| explained_variance | 0.801         |
| fps                | 1078          |
| n_updates          | 717           |
| policy_entropy     | 0.15610506    |
| policy_loss        | 0.00016522541 |
| serial_timesteps   | 2294400       |
| time_elapsed       | 2.07e+03      |
| total_timesteps    | 2294400       |
| value_loss         | 5.718257      |
--------------------------------------
---------------------------------------
| approxkl           | 1.0448113e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.943          |
| fps                | 1116           |
| n_updates          | 718            |
| policy_entropy     | 0.06439933     |
| policy_loss        | -2.7161019e-05 |
| serial_timesteps   | 2297600        |
| time_elapsed       | 2.07e+03       |
| total_timesteps    | 2297600        |
| value_loss         | 6.6838536      |
-------------

---------------------------------------
| approxkl           | 0.00034196844  |
| clipfrac           | 0.003515625    |
| explained_variance | -0.608         |
| fps                | 1130           |
| n_updates          | 733            |
| policy_entropy     | 0.21181506     |
| policy_loss        | -0.00052262074 |
| serial_timesteps   | 2345600        |
| time_elapsed       | 2.11e+03       |
| total_timesteps    | 2345600        |
| value_loss         | 1.3580108      |
---------------------------------------
--------------------------------------
| approxkl           | 0.00048787083 |
| clipfrac           | 0.00453125    |
| explained_variance | 0.915         |
| fps                | 1167          |
| n_updates          | 734           |
| policy_entropy     | 0.09772769    |
| policy_loss        | 0.00016157344 |
| serial_timesteps   | 2348800       |
| time_elapsed       | 2.11e+03      |
| total_timesteps    | 2348800       |
| value_loss         | 8.915248      |
------------

---------------------------------------
| approxkl           | 2.7090977e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.935          |
| fps                | 1173           |
| n_updates          | 749            |
| policy_entropy     | 0.12796348     |
| policy_loss        | -0.00010607354 |
| serial_timesteps   | 2396800        |
| time_elapsed       | 2.16e+03       |
| total_timesteps    | 2396800        |
| value_loss         | 7.6180773      |
---------------------------------------
--------------------------------------
| approxkl           | 5.6060187e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.972         |
| fps                | 1178          |
| n_updates          | 750           |
| policy_entropy     | 0.18471819    |
| policy_loss        | 6.810628e-05  |
| serial_timesteps   | 2400000       |
| time_elapsed       | 2.16e+03      |
| total_timesteps    | 2400000       |
| value_loss         | 3.6554565     |
------------

---------------------------------------
| approxkl           | 2.9174988e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.957          |
| fps                | 1062           |
| n_updates          | 765            |
| policy_entropy     | 0.12551379     |
| policy_loss        | -5.8553218e-05 |
| serial_timesteps   | 2448000        |
| time_elapsed       | 2.2e+03        |
| total_timesteps    | 2448000        |
| value_loss         | 3.184758       |
---------------------------------------
---------------------------------------
| approxkl           | 0.00052043254  |
| clipfrac           | 0.00609375     |
| explained_variance | -3.21          |
| fps                | 1065           |
| n_updates          | 766            |
| policy_entropy     | 0.17203562     |
| policy_loss        | -0.00055205735 |
| serial_timesteps   | 2451200        |
| time_elapsed       | 2.2e+03        |
| total_timesteps    | 2451200        |
| value_loss         | 0.8992125      |


---------------------------------------
| approxkl           | 0.00012683345  |
| clipfrac           | 0.00046875     |
| explained_variance | 0.968          |
| fps                | 1082           |
| n_updates          | 781            |
| policy_entropy     | 0.125514       |
| policy_loss        | -4.8005357e-05 |
| serial_timesteps   | 2499200        |
| time_elapsed       | 2.25e+03       |
| total_timesteps    | 2499200        |
| value_loss         | 4.6116314      |
---------------------------------------
--------------------------------------
| approxkl           | 0.00061348395 |
| clipfrac           | 0.0009375     |
| explained_variance | 0.933         |
| fps                | 1159          |
| n_updates          | 782           |
| policy_entropy     | 0.26837713    |
| policy_loss        | -0.0009430223 |
| serial_timesteps   | 2502400       |
| time_elapsed       | 2.25e+03      |
| total_timesteps    | 2502400       |
| value_loss         | 6.921483      |
------------

--------------------------------------
| approxkl           | 3.2523385e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.89          |
| fps                | 1157          |
| n_updates          | 798           |
| policy_entropy     | 0.0899815     |
| policy_loss        | 5.9825332e-05 |
| serial_timesteps   | 2553600       |
| time_elapsed       | 2.3e+03       |
| total_timesteps    | 2553600       |
| value_loss         | 10.681336     |
--------------------------------------
---------------------------------------
| approxkl           | 5.833048e-05   |
| clipfrac           | 0.0            |
| explained_variance | 0.939          |
| fps                | 1150           |
| n_updates          | 799            |
| policy_entropy     | 0.20252275     |
| policy_loss        | -0.00013355614 |
| serial_timesteps   | 2556800        |
| time_elapsed       | 2.3e+03        |
| total_timesteps    | 2556800        |
| value_loss         | 3.6916037      |
-------------

---------------------------------------
| approxkl           | 1.5912388e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.944          |
| fps                | 1126           |
| n_updates          | 814            |
| policy_entropy     | 0.14291498     |
| policy_loss        | -0.00013878115 |
| serial_timesteps   | 2604800        |
| time_elapsed       | 2.34e+03       |
| total_timesteps    | 2604800        |
| value_loss         | 4.5891647      |
---------------------------------------
---------------------------------------
| approxkl           | 3.1531803e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.944          |
| fps                | 1156           |
| n_updates          | 815            |
| policy_entropy     | 0.16404186     |
| policy_loss        | -0.00016095876 |
| serial_timesteps   | 2608000        |
| time_elapsed       | 2.34e+03       |
| total_timesteps    | 2608000        |
| value_loss         | 3.6212645      |


-------------------------------------
| approxkl           | 0.0007803467 |
| clipfrac           | 0.015000001  |
| explained_variance | 0.962        |
| fps                | 1098         |
| n_updates          | 831          |
| policy_entropy     | 0.22007203   |
| policy_loss        | 6.92294e-05  |
| serial_timesteps   | 2659200      |
| time_elapsed       | 2.39e+03     |
| total_timesteps    | 2659200      |
| value_loss         | 2.5670528    |
-------------------------------------
-------------------------------------
| approxkl           | 0.008636164  |
| clipfrac           | 0.12671874   |
| explained_variance | 0.961        |
| fps                | 1110         |
| n_updates          | 832          |
| policy_entropy     | 0.3208494    |
| policy_loss        | 0.0072074593 |
| serial_timesteps   | 2662400      |
| time_elapsed       | 2.39e+03     |
| total_timesteps    | 2662400      |
| value_loss         | 5.214184     |
-------------------------------------
------------

--------------------------------------
| approxkl           | 0.0006403182  |
| clipfrac           | 0.0071093747  |
| explained_variance | -2.99         |
| fps                | 1132          |
| n_updates          | 848           |
| policy_entropy     | 0.21464033    |
| policy_loss        | -0.0007110165 |
| serial_timesteps   | 2713600       |
| time_elapsed       | 2.44e+03      |
| total_timesteps    | 2713600       |
| value_loss         | 1.0170609     |
--------------------------------------
--------------------------------------
| approxkl           | 0.0005604147  |
| clipfrac           | 0.00578125    |
| explained_variance | 0.943         |
| fps                | 1144          |
| n_updates          | 849           |
| policy_entropy     | 0.15525037    |
| policy_loss        | -0.0004225982 |
| serial_timesteps   | 2716800       |
| time_elapsed       | 2.44e+03      |
| total_timesteps    | 2716800       |
| value_loss         | 3.4649286     |
-------------------------

--------------------------------------
| approxkl           | 0.00016853568 |
| clipfrac           | 0.00140625    |
| explained_variance | 0.973         |
| fps                | 870           |
| n_updates          | 865           |
| policy_entropy     | 0.17714863    |
| policy_loss        | -0.0003116481 |
| serial_timesteps   | 2768000       |
| time_elapsed       | 2.84e+03      |
| total_timesteps    | 2768000       |
| value_loss         | 3.1544025     |
--------------------------------------
---------------------------------------
| approxkl           | 0.0002914921   |
| clipfrac           | 0.0030468751   |
| explained_variance | 0.93           |
| fps                | 674            |
| n_updates          | 866            |
| policy_entropy     | 0.17573503     |
| policy_loss        | -0.00026486977 |
| serial_timesteps   | 2771200        |
| time_elapsed       | 2.84e+03       |
| total_timesteps    | 2771200        |
| value_loss         | 0.7397139      |
-------------

--------------------------------------
| approxkl           | 2.4067012e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.946         |
| fps                | 1156          |
| n_updates          | 882           |
| policy_entropy     | 0.13091931    |
| policy_loss        | 4.4810102e-05 |
| serial_timesteps   | 2822400       |
| time_elapsed       | 2.89e+03      |
| total_timesteps    | 2822400       |
| value_loss         | 3.6798606     |
--------------------------------------
-------------------------------------
| approxkl           | 2.244325e-05 |
| clipfrac           | 7.8125e-05   |
| explained_variance | 0.86         |
| fps                | 1068         |
| n_updates          | 883          |
| policy_entropy     | 0.14284503   |
| policy_loss        | 7.395536e-05 |
| serial_timesteps   | 2825600      |
| time_elapsed       | 2.89e+03     |
| total_timesteps    | 2825600      |
| value_loss         | 8.390606     |
-------------------------------------

--------------------------------------
| approxkl           | 4.5152272e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.895         |
| fps                | 1219          |
| n_updates          | 899           |
| policy_entropy     | 0.124091506   |
| policy_loss        | 9.156483e-06  |
| serial_timesteps   | 2876800       |
| time_elapsed       | 2.94e+03      |
| total_timesteps    | 2876800       |
| value_loss         | 6.1309667     |
--------------------------------------
--------------------------------------
| approxkl           | 3.7451427e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.89          |
| fps                | 1214          |
| n_updates          | 900           |
| policy_entropy     | 0.10073123    |
| policy_loss        | 4.69273e-05   |
| serial_timesteps   | 2880000       |
| time_elapsed       | 2.94e+03      |
| total_timesteps    | 2880000       |
| value_loss         | 11.289833     |
-------------------------

--------------------------------------
| approxkl           | 3.728183e-05  |
| clipfrac           | 0.0           |
| explained_variance | 0.942         |
| fps                | 1171          |
| n_updates          | 916           |
| policy_entropy     | 0.13269556    |
| policy_loss        | 3.2890217e-05 |
| serial_timesteps   | 2931200       |
| time_elapsed       | 2.98e+03      |
| total_timesteps    | 2931200       |
| value_loss         | 5.3628106     |
--------------------------------------
---------------------------------------
| approxkl           | 5.8085305e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.926          |
| fps                | 1145           |
| n_updates          | 917            |
| policy_entropy     | 0.16736989     |
| policy_loss        | 0.000117844414 |
| serial_timesteps   | 2934400        |
| time_elapsed       | 2.99e+03       |
| total_timesteps    | 2934400        |
| value_loss         | 6.8995028      |
-------------

---------------------------------------
| approxkl           | 3.5357392e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.964          |
| fps                | 1178           |
| n_updates          | 932            |
| policy_entropy     | 0.19265553     |
| policy_loss        | -3.2966211e-06 |
| serial_timesteps   | 2982400        |
| time_elapsed       | 3.03e+03       |
| total_timesteps    | 2982400        |
| value_loss         | 4.0448756      |
---------------------------------------
----------------------------------------
| approxkl           | 7.150428e-05    |
| clipfrac           | 0.0             |
| explained_variance | 0.96            |
| fps                | 1155            |
| n_updates          | 933             |
| policy_entropy     | 0.3189465       |
| policy_loss        | -0.000105621366 |
| serial_timesteps   | 2985600         |
| time_elapsed       | 3.03e+03        |
| total_timesteps    | 2985600         |
| value_loss         | 3.0359

--------------------------------------
| approxkl           | 2.5796082e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.934         |
| fps                | 1126          |
| n_updates          | 948           |
| policy_entropy     | 0.17927013    |
| policy_loss        | -6.453306e-05 |
| serial_timesteps   | 3033600       |
| time_elapsed       | 3.07e+03      |
| total_timesteps    | 3033600       |
| value_loss         | 7.1957445     |
--------------------------------------
--------------------------------------
| approxkl           | 0.0010066283  |
| clipfrac           | 0.011640625   |
| explained_variance | 0.978         |
| fps                | 1122          |
| n_updates          | 949           |
| policy_entropy     | 0.1858347     |
| policy_loss        | -0.0015752923 |
| serial_timesteps   | 3036800       |
| time_elapsed       | 3.07e+03      |
| total_timesteps    | 3036800       |
| value_loss         | 0.25566298    |
-------------------------

---------------------------------------
| approxkl           | 0.00010849959  |
| clipfrac           | 0.001875       |
| explained_variance | 0.923          |
| fps                | 1146           |
| n_updates          | 965            |
| policy_entropy     | 0.19195595     |
| policy_loss        | -0.00021080286 |
| serial_timesteps   | 3088000        |
| time_elapsed       | 3.12e+03       |
| total_timesteps    | 3088000        |
| value_loss         | 3.2534332      |
---------------------------------------
----------------------------------------
| approxkl           | 0.00011278571   |
| clipfrac           | 0.000390625     |
| explained_variance | 0.991           |
| fps                | 1156            |
| n_updates          | 966             |
| policy_entropy     | 0.13983285      |
| policy_loss        | -0.000120968965 |
| serial_timesteps   | 3091200         |
| time_elapsed       | 3.12e+03        |
| total_timesteps    | 3091200         |
| value_loss         | 1.1227

---------------------------------------
| approxkl           | 4.0007093e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.948          |
| fps                | 862            |
| n_updates          | 981            |
| policy_entropy     | 0.14305224     |
| policy_loss        | -2.5644917e-05 |
| serial_timesteps   | 3139200        |
| time_elapsed       | 3.17e+03       |
| total_timesteps    | 3139200        |
| value_loss         | 3.329823       |
---------------------------------------
--------------------------------------
| approxkl           | 2.4913887e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.95          |
| fps                | 867           |
| n_updates          | 982           |
| policy_entropy     | 0.14497307    |
| policy_loss        | 1.2779459e-05 |
| serial_timesteps   | 3142400       |
| time_elapsed       | 3.17e+03      |
| total_timesteps    | 3142400       |
| value_loss         | 3.4221282     |
------------

--------------------------------------
| approxkl           | 6.6683875e-05 |
| clipfrac           | 0.000390625   |
| explained_variance | 0.926         |
| fps                | 1135          |
| n_updates          | 998           |
| policy_entropy     | 0.15333529    |
| policy_loss        | 6.644413e-05  |
| serial_timesteps   | 3193600       |
| time_elapsed       | 3.23e+03      |
| total_timesteps    | 3193600       |
| value_loss         | 4.0197787     |
--------------------------------------
--------------------------------------
| approxkl           | 3.3133543e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.915         |
| fps                | 1187          |
| n_updates          | 999           |
| policy_entropy     | 0.36025894    |
| policy_loss        | -8.337449e-05 |
| serial_timesteps   | 3196800       |
| time_elapsed       | 3.23e+03      |
| total_timesteps    | 3196800       |
| value_loss         | 7.2313704     |
-------------------------

---------------------------------------
| approxkl           | 5.3036114e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.887          |
| fps                | 911            |
| n_updates          | 1015           |
| policy_entropy     | 0.18149137     |
| policy_loss        | -3.0000658e-05 |
| serial_timesteps   | 3248000        |
| time_elapsed       | 6.71e+03       |
| total_timesteps    | 3248000        |
| value_loss         | 1.4445738      |
---------------------------------------
---------------------------------------
| approxkl           | 2.2509703e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.947          |
| fps                | 903            |
| n_updates          | 1016           |
| policy_entropy     | 0.29242164     |
| policy_loss        | -1.8539282e-05 |
| serial_timesteps   | 3251200        |
| time_elapsed       | 6.72e+03       |
| total_timesteps    | 3251200        |
| value_loss         | 3.854261       |


---------------------------------------
| approxkl           | 0.00011227801  |
| clipfrac           | 0.000234375    |
| explained_variance | 0.951          |
| fps                | 1134           |
| n_updates          | 1032           |
| policy_entropy     | 0.15158999     |
| policy_loss        | -0.00022420243 |
| serial_timesteps   | 3302400        |
| time_elapsed       | 6.77e+03       |
| total_timesteps    | 3302400        |
| value_loss         | 3.4640346      |
---------------------------------------
--------------------------------------
| approxkl           | 7.550781e-05  |
| clipfrac           | 0.000234375   |
| explained_variance | 0.929         |
| fps                | 1181          |
| n_updates          | 1033          |
| policy_entropy     | 0.28003353    |
| policy_loss        | 0.00011164442 |
| serial_timesteps   | 3305600       |
| time_elapsed       | 6.77e+03      |
| total_timesteps    | 3305600       |
| value_loss         | 6.134348      |
------------

--------------------------------------
| approxkl           | 1.1733649e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.965         |
| fps                | 997           |
| n_updates          | 1049          |
| policy_entropy     | 0.15530941    |
| policy_loss        | 4.580021e-05  |
| serial_timesteps   | 3356800       |
| time_elapsed       | 6.82e+03      |
| total_timesteps    | 3356800       |
| value_loss         | 5.1047583     |
--------------------------------------
---------------------------------------
| approxkl           | 0.000110908746 |
| clipfrac           | 0.0            |
| explained_variance | 0.947          |
| fps                | 1074           |
| n_updates          | 1050           |
| policy_entropy     | 0.36986196     |
| policy_loss        | -0.00030446157 |
| serial_timesteps   | 3360000        |
| time_elapsed       | 6.82e+03       |
| total_timesteps    | 3360000        |
| value_loss         | 5.79639        |
-------------

---------------------------------------
| approxkl           | 2.3424756e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.932          |
| fps                | 1130           |
| n_updates          | 1066           |
| policy_entropy     | 0.1139137      |
| policy_loss        | -0.00010588911 |
| serial_timesteps   | 3411200        |
| time_elapsed       | 6.87e+03       |
| total_timesteps    | 3411200        |
| value_loss         | 4.22572        |
---------------------------------------
--------------------------------------
| approxkl           | 0.00011490393 |
| clipfrac           | 0.0003125     |
| explained_variance | 0.759         |
| fps                | 1116          |
| n_updates          | 1067          |
| policy_entropy     | 0.18685304    |
| policy_loss        | -3.614284e-05 |
| serial_timesteps   | 3414400       |
| time_elapsed       | 6.87e+03      |
| total_timesteps    | 3414400       |
| value_loss         | 3.9823847     |
------------

---------------------------------------
| approxkl           | 0.00011577397  |
| clipfrac           | 0.0003125      |
| explained_variance | 0.907          |
| fps                | 1053           |
| n_updates          | 1083           |
| policy_entropy     | 0.13392943     |
| policy_loss        | -0.00012502499 |
| serial_timesteps   | 3465600        |
| time_elapsed       | 6.92e+03       |
| total_timesteps    | 3465600        |
| value_loss         | 7.001243       |
---------------------------------------
--------------------------------------
| approxkl           | 8.8672605e-05 |
| clipfrac           | 0.000546875   |
| explained_variance | 0.939         |
| fps                | 1136          |
| n_updates          | 1084          |
| policy_entropy     | 0.21987529    |
| policy_loss        | 2.4101808e-05 |
| serial_timesteps   | 3468800       |
| time_elapsed       | 6.92e+03      |
| total_timesteps    | 3468800       |
| value_loss         | 3.6359766     |
------------

---------------------------------------
| approxkl           | 4.126203e-05   |
| clipfrac           | 0.0            |
| explained_variance | 0.933          |
| fps                | 1116           |
| n_updates          | 1099           |
| policy_entropy     | 0.16086508     |
| policy_loss        | -8.3100196e-05 |
| serial_timesteps   | 3516800        |
| time_elapsed       | 6.97e+03       |
| total_timesteps    | 3516800        |
| value_loss         | 6.27314        |
---------------------------------------
---------------------------------------
| approxkl           | 0.00020621376  |
| clipfrac           | 0.0            |
| explained_variance | 0.942          |
| fps                | 1078           |
| n_updates          | 1100           |
| policy_entropy     | 0.22601423     |
| policy_loss        | -0.00048890547 |
| serial_timesteps   | 3520000        |
| time_elapsed       | 6.97e+03       |
| total_timesteps    | 3520000        |
| value_loss         | 6.6418533      |


--------------------------------------
| approxkl           | 0.00085539324 |
| clipfrac           | 0.008203125   |
| explained_variance | 0.93          |
| fps                | 1030          |
| n_updates          | 1116          |
| policy_entropy     | 0.18204811    |
| policy_loss        | 8.547664e-05  |
| serial_timesteps   | 3571200       |
| time_elapsed       | 7.02e+03      |
| total_timesteps    | 3571200       |
| value_loss         | 6.7055964     |
--------------------------------------
--------------------------------------
| approxkl           | 0.0012515936  |
| clipfrac           | 0.015468749   |
| explained_variance | -0.657        |
| fps                | 1007          |
| n_updates          | 1117          |
| policy_entropy     | 0.19156875    |
| policy_loss        | -0.0013178727 |
| serial_timesteps   | 3574400       |
| time_elapsed       | 7.02e+03      |
| total_timesteps    | 3574400       |
| value_loss         | 0.78331494    |
-------------------------

---------------------------------------
| approxkl           | 0.00022371193  |
| clipfrac           | 0.000234375    |
| explained_variance | 0.975          |
| fps                | 1076           |
| n_updates          | 1133           |
| policy_entropy     | 0.21804234     |
| policy_loss        | -0.00012555465 |
| serial_timesteps   | 3625600        |
| time_elapsed       | 7.07e+03       |
| total_timesteps    | 3625600        |
| value_loss         | 1.0287945      |
---------------------------------------
---------------------------------------
| approxkl           | 0.0004101211   |
| clipfrac           | 0.00109375     |
| explained_variance | 0.956          |
| fps                | 1102           |
| n_updates          | 1134           |
| policy_entropy     | 0.27237633     |
| policy_loss        | -0.00034304045 |
| serial_timesteps   | 3628800        |
| time_elapsed       | 7.07e+03       |
| total_timesteps    | 3628800        |
| value_loss         | 5.305181       |


--------------------------------------
| approxkl           | 0.0003267129  |
| clipfrac           | 0.004140625   |
| explained_variance | 0.972         |
| fps                | 1056          |
| n_updates          | 1149          |
| policy_entropy     | 0.17268589    |
| policy_loss        | 0.00010798685 |
| serial_timesteps   | 3676800       |
| time_elapsed       | 7.12e+03      |
| total_timesteps    | 3676800       |
| value_loss         | 1.8050023     |
--------------------------------------
---------------------------------------
| approxkl           | 0.0001069712   |
| clipfrac           | 0.0            |
| explained_variance | 0.92           |
| fps                | 1042           |
| n_updates          | 1150           |
| policy_entropy     | 0.18348561     |
| policy_loss        | -0.00019591436 |
| serial_timesteps   | 3680000        |
| time_elapsed       | 7.12e+03       |
| total_timesteps    | 3680000        |
| value_loss         | 1.4755377      |
-------------

--------------------------------------
| approxkl           | 0.00020726433 |
| clipfrac           | 0.0018750001  |
| explained_variance | 0.923         |
| fps                | 1085          |
| n_updates          | 1166          |
| policy_entropy     | 0.13476503    |
| policy_loss        | -7.577594e-05 |
| serial_timesteps   | 3731200       |
| time_elapsed       | 7.17e+03      |
| total_timesteps    | 3731200       |
| value_loss         | 4.245995      |
--------------------------------------
--------------------------------------
| approxkl           | 0.0001986484  |
| clipfrac           | 0.000546875   |
| explained_variance | 0.957         |
| fps                | 1043          |
| n_updates          | 1167          |
| policy_entropy     | 0.26586577    |
| policy_loss        | 0.00023200981 |
| serial_timesteps   | 3734400       |
| time_elapsed       | 7.18e+03      |
| total_timesteps    | 3734400       |
| value_loss         | 3.1027625     |
-------------------------

---------------------------------------
| approxkl           | 0.0005461931   |
| clipfrac           | 0.0            |
| explained_variance | 0.954          |
| fps                | 1040           |
| n_updates          | 1183           |
| policy_entropy     | 0.27478284     |
| policy_loss        | -0.00013753225 |
| serial_timesteps   | 3785600        |
| time_elapsed       | 7.23e+03       |
| total_timesteps    | 3785600        |
| value_loss         | 4.8143673      |
---------------------------------------
---------------------------------------
| approxkl           | 0.0002791206   |
| clipfrac           | 0.0            |
| explained_variance | 0.945          |
| fps                | 1051           |
| n_updates          | 1184           |
| policy_entropy     | 0.2382167      |
| policy_loss        | -0.00011814803 |
| serial_timesteps   | 3788800        |
| time_elapsed       | 7.23e+03       |
| total_timesteps    | 3788800        |
| value_loss         | 3.7462122      |


---------------------------------------
| approxkl           | 0.00035838556  |
| clipfrac           | 0.001640625    |
| explained_variance | 0.918          |
| fps                | 906            |
| n_updates          | 1200           |
| policy_entropy     | 0.20449312     |
| policy_loss        | -0.00039819436 |
| serial_timesteps   | 3840000        |
| time_elapsed       | 7.28e+03       |
| total_timesteps    | 3840000        |
| value_loss         | 5.257825       |
---------------------------------------
---------------------------------------
| approxkl           | 0.00028807347  |
| clipfrac           | 0.002578125    |
| explained_variance | 0.549          |
| fps                | 969            |
| n_updates          | 1201           |
| policy_entropy     | 0.15599158     |
| policy_loss        | -0.00026294246 |
| serial_timesteps   | 3843200        |
| time_elapsed       | 7.29e+03       |
| total_timesteps    | 3843200        |
| value_loss         | 2.2093828      |


---------------------------------------
| approxkl           | 0.0009771148   |
| clipfrac           | 0.01203125     |
| explained_variance | 0.875          |
| fps                | 934            |
| n_updates          | 1216           |
| policy_entropy     | 0.16298449     |
| policy_loss        | -0.00057161716 |
| serial_timesteps   | 3891200        |
| time_elapsed       | 7.34e+03       |
| total_timesteps    | 3891200        |
| value_loss         | 0.42514881     |
---------------------------------------
-------------------------------------
| approxkl           | 0.0007838593 |
| clipfrac           | 0.008984375  |
| explained_variance | 0.892        |
| fps                | 966          |
| n_updates          | 1217         |
| policy_entropy     | 0.10799018   |
| policy_loss        | 0.0006913232 |
| serial_timesteps   | 3894400      |
| time_elapsed       | 7.34e+03     |
| total_timesteps    | 3894400      |
| value_loss         | 8.025402     |
------------------------

---------------------------------------
| approxkl           | 0.00043425904  |
| clipfrac           | 7.8125e-05     |
| explained_variance | 0.944          |
| fps                | 1030           |
| n_updates          | 1233           |
| policy_entropy     | 0.30806404     |
| policy_loss        | -0.00030767615 |
| serial_timesteps   | 3945600        |
| time_elapsed       | 7.4e+03        |
| total_timesteps    | 3945600        |
| value_loss         | 5.6622934      |
---------------------------------------
---------------------------------------
| approxkl           | 0.0001326601   |
| clipfrac           | 0.000859375    |
| explained_variance | 0.944          |
| fps                | 1061           |
| n_updates          | 1234           |
| policy_entropy     | 0.1301941      |
| policy_loss        | -0.00049532985 |
| serial_timesteps   | 3948800        |
| time_elapsed       | 7.4e+03        |
| total_timesteps    | 3948800        |
| value_loss         | 4.350543       |


--------------------------------------
| approxkl           | 0.0044957977  |
| clipfrac           | 0.0421875     |
| explained_variance | -0.329        |
| fps                | 1001          |
| n_updates          | 1250          |
| policy_entropy     | 0.18089749    |
| policy_loss        | -0.0019619549 |
| serial_timesteps   | 4000000       |
| time_elapsed       | 7.45e+03      |
| total_timesteps    | 4000000       |
| value_loss         | 0.13139206    |
--------------------------------------
--------------------------------------
| approxkl           | 0.0007384253  |
| clipfrac           | 0.0064843753  |
| explained_variance | 0.933         |
| fps                | 1021          |
| n_updates          | 1251          |
| policy_entropy     | 0.19083911    |
| policy_loss        | 0.00035392138 |
| serial_timesteps   | 4003200       |
| time_elapsed       | 7.45e+03      |
| total_timesteps    | 4003200       |
| value_loss         | 6.423918      |
-------------------------

--------------------------------------
| approxkl           | 0.0014298435  |
| clipfrac           | 0.015625      |
| explained_variance | 0.957         |
| fps                | 912           |
| n_updates          | 1267          |
| policy_entropy     | 0.23914143    |
| policy_loss        | 0.00046910805 |
| serial_timesteps   | 4054400       |
| time_elapsed       | 7.51e+03      |
| total_timesteps    | 4054400       |
| value_loss         | 2.9203718     |
--------------------------------------
--------------------------------------
| approxkl           | 0.002814426   |
| clipfrac           | 0.025937501   |
| explained_variance | -3.04         |
| fps                | 859           |
| n_updates          | 1268          |
| policy_entropy     | 0.15604275    |
| policy_loss        | -0.0006623004 |
| serial_timesteps   | 4057600       |
| time_elapsed       | 7.51e+03      |
| total_timesteps    | 4057600       |
| value_loss         | 0.19979218    |
-------------------------

---------------------------------------
| approxkl           | 7.183525e-05   |
| clipfrac           | 0.0003125      |
| explained_variance | 0.936          |
| fps                | 1000           |
| n_updates          | 1284           |
| policy_entropy     | 0.11571915     |
| policy_loss        | -3.0401574e-05 |
| serial_timesteps   | 4108800        |
| time_elapsed       | 7.56e+03       |
| total_timesteps    | 4108800        |
| value_loss         | 3.5231738      |
---------------------------------------
---------------------------------------
| approxkl           | 5.616163e-05   |
| clipfrac           | 0.0            |
| explained_variance | 0.922          |
| fps                | 961            |
| n_updates          | 1285           |
| policy_entropy     | 0.11431049     |
| policy_loss        | -0.00018460784 |
| serial_timesteps   | 4112000        |
| time_elapsed       | 7.57e+03       |
| total_timesteps    | 4112000        |
| value_loss         | 3.6641326      |


--------------------------------------
| approxkl           | 0.00013177437 |
| clipfrac           | 0.000546875   |
| explained_variance | 0.919         |
| fps                | 956           |
| n_updates          | 1301          |
| policy_entropy     | 0.2224108     |
| policy_loss        | -4.820362e-05 |
| serial_timesteps   | 4163200       |
| time_elapsed       | 7.62e+03      |
| total_timesteps    | 4163200       |
| value_loss         | 3.6170018     |
--------------------------------------
--------------------------------------
| approxkl           | 3.740782e-05  |
| clipfrac           | 0.0           |
| explained_variance | 0.918         |
| fps                | 959           |
| n_updates          | 1302          |
| policy_entropy     | 0.2143532     |
| policy_loss        | 3.1067506e-05 |
| serial_timesteps   | 4166400       |
| time_elapsed       | 7.62e+03      |
| total_timesteps    | 4166400       |
| value_loss         | 3.6536999     |
-------------------------

-------------------------------------
| approxkl           | 0.0030872053 |
| clipfrac           | 0.056015626  |
| explained_variance | 0.951        |
| fps                | 1000         |
| n_updates          | 1318         |
| policy_entropy     | 0.27011934   |
| policy_loss        | 0.0017157805 |
| serial_timesteps   | 4217600      |
| time_elapsed       | 7.68e+03     |
| total_timesteps    | 4217600      |
| value_loss         | 1.4485383    |
-------------------------------------
--------------------------------------
| approxkl           | 0.00023753391 |
| clipfrac           | 0.001953125   |
| explained_variance | 0.902         |
| fps                | 981           |
| n_updates          | 1319          |
| policy_entropy     | 0.34239554    |
| policy_loss        | 0.0003791838  |
| serial_timesteps   | 4220800       |
| time_elapsed       | 7.68e+03      |
| total_timesteps    | 4220800       |
| value_loss         | 5.558675      |
--------------------------------------

--------------------------------------
| approxkl           | 0.0018522129  |
| clipfrac           | 0.019375      |
| explained_variance | -1.56         |
| fps                | 966           |
| n_updates          | 1335          |
| policy_entropy     | 0.20286192    |
| policy_loss        | -0.0006811862 |
| serial_timesteps   | 4272000       |
| time_elapsed       | 7.74e+03      |
| total_timesteps    | 4272000       |
| value_loss         | 0.16759884    |
--------------------------------------
--------------------------------------
| approxkl           | 0.0012430688  |
| clipfrac           | 0.007265625   |
| explained_variance | 0.916         |
| fps                | 984           |
| n_updates          | 1336          |
| policy_entropy     | 0.31322917    |
| policy_loss        | 0.00035474048 |
| serial_timesteps   | 4275200       |
| time_elapsed       | 7.74e+03      |
| total_timesteps    | 4275200       |
| value_loss         | 3.7285419     |
-------------------------

---------------------------------------
| approxkl           | 0.00022423817  |
| clipfrac           | 0.002265625    |
| explained_variance | 0.928          |
| fps                | 975            |
| n_updates          | 1352           |
| policy_entropy     | 0.14906006     |
| policy_loss        | -0.00016666525 |
| serial_timesteps   | 4326400        |
| time_elapsed       | 7.79e+03       |
| total_timesteps    | 4326400        |
| value_loss         | 5.2839284      |
---------------------------------------
--------------------------------------
| approxkl           | 0.00014738916 |
| clipfrac           | 0.00039062498 |
| explained_variance | 0.899         |
| fps                | 965           |
| n_updates          | 1353          |
| policy_entropy     | 0.15872158    |
| policy_loss        | -0.0001260782 |
| serial_timesteps   | 4329600       |
| time_elapsed       | 7.79e+03      |
| total_timesteps    | 4329600       |
| value_loss         | 4.1722884     |
------------

-------------------------------------
| approxkl           | 0.000507284  |
| clipfrac           | 0.005703125  |
| explained_variance | 0.183        |
| fps                | 995          |
| n_updates          | 1369         |
| policy_entropy     | 0.19507666   |
| policy_loss        | -0.000896494 |
| serial_timesteps   | 4380800      |
| time_elapsed       | 7.85e+03     |
| total_timesteps    | 4380800      |
| value_loss         | 0.36618102   |
-------------------------------------
--------------------------------------
| approxkl           | 0.0002863036  |
| clipfrac           | 0.0028125     |
| explained_variance | 0.911         |
| fps                | 1014          |
| n_updates          | 1370          |
| policy_entropy     | 0.23177156    |
| policy_loss        | 2.5571433e-05 |
| serial_timesteps   | 4384000       |
| time_elapsed       | 7.85e+03      |
| total_timesteps    | 4384000       |
| value_loss         | 3.8873606     |
--------------------------------------

--------------------------------------
| approxkl           | 0.00035908568 |
| clipfrac           | 0.00390625    |
| explained_variance | 0.896         |
| fps                | 787           |
| n_updates          | 1386          |
| policy_entropy     | 0.1822776     |
| policy_loss        | -0.0001531195 |
| serial_timesteps   | 4435200       |
| time_elapsed       | 7.91e+03      |
| total_timesteps    | 4435200       |
| value_loss         | 1.0148544     |
--------------------------------------
---------------------------------------
| approxkl           | 0.0013219616   |
| clipfrac           | 0.016796876    |
| explained_variance | -1.57          |
| fps                | 821            |
| n_updates          | 1387           |
| policy_entropy     | 0.1898226      |
| policy_loss        | -0.00086539774 |
| serial_timesteps   | 4438400        |
| time_elapsed       | 7.91e+03       |
| total_timesteps    | 4438400        |
| value_loss         | 0.21055466     |
-------------

--------------------------------------
| approxkl           | 0.0015226929  |
| clipfrac           | 0.016171874   |
| explained_variance | -2.36         |
| fps                | 836           |
| n_updates          | 1403          |
| policy_entropy     | 0.174308      |
| policy_loss        | -0.0013341361 |
| serial_timesteps   | 4489600       |
| time_elapsed       | 7.97e+03      |
| total_timesteps    | 4489600       |
| value_loss         | 0.16968513    |
--------------------------------------
--------------------------------------
| approxkl           | 0.0013971541  |
| clipfrac           | 0.018515624   |
| explained_variance | 0.916         |
| fps                | 876           |
| n_updates          | 1404          |
| policy_entropy     | 0.2023628     |
| policy_loss        | 0.00044558285 |
| serial_timesteps   | 4492800       |
| time_elapsed       | 7.97e+03      |
| total_timesteps    | 4492800       |
| value_loss         | 3.7738137     |
-------------------------

---------------------------------------
| approxkl           | 0.00042463752  |
| clipfrac           | 0.00296875     |
| explained_variance | 0.961          |
| fps                | 988            |
| n_updates          | 1420           |
| policy_entropy     | 0.25582197     |
| policy_loss        | -0.00045850774 |
| serial_timesteps   | 4544000        |
| time_elapsed       | 8.03e+03       |
| total_timesteps    | 4544000        |
| value_loss         | 2.8064177      |
---------------------------------------
--------------------------------------
| approxkl           | 0.00034651556 |
| clipfrac           | 0.0032812501  |
| explained_variance | 0.944         |
| fps                | 975           |
| n_updates          | 1421          |
| policy_entropy     | 0.16107494    |
| policy_loss        | -0.0001479547 |
| serial_timesteps   | 4547200       |
| time_elapsed       | 8.03e+03      |
| total_timesteps    | 4547200       |
| value_loss         | 3.2053454     |
------------

---------------------------------------
| approxkl           | 0.00016801138  |
| clipfrac           | 0.00125        |
| explained_variance | 0.937          |
| fps                | 990            |
| n_updates          | 1437           |
| policy_entropy     | 0.13569836     |
| policy_loss        | -4.1606574e-05 |
| serial_timesteps   | 4598400        |
| time_elapsed       | 8.08e+03       |
| total_timesteps    | 4598400        |
| value_loss         | 3.4360037      |
---------------------------------------
--------------------------------------
| approxkl           | 0.00083838846 |
| clipfrac           | 0.00984375    |
| explained_variance | -3.77         |
| fps                | 989           |
| n_updates          | 1438          |
| policy_entropy     | 0.16390125    |
| policy_loss        | -0.0011900407 |
| serial_timesteps   | 4601600       |
| time_elapsed       | 8.09e+03      |
| total_timesteps    | 4601600       |
| value_loss         | 0.3062048     |
------------

--------------------------------------
| approxkl           | 7.889171e-05  |
| clipfrac           | 0.0003125     |
| explained_variance | 0.922         |
| fps                | 950           |
| n_updates          | 1454          |
| policy_entropy     | 0.14972816    |
| policy_loss        | -8.160077e-05 |
| serial_timesteps   | 4652800       |
| time_elapsed       | 8.14e+03      |
| total_timesteps    | 4652800       |
| value_loss         | 4.469141      |
--------------------------------------
----------------------------------------
| approxkl           | 6.710027e-05    |
| clipfrac           | 7.8125e-05      |
| explained_variance | 0.932           |
| fps                | 941             |
| n_updates          | 1455            |
| policy_entropy     | 0.14997981      |
| policy_loss        | -0.000102898324 |
| serial_timesteps   | 4656000         |
| time_elapsed       | 8.14e+03        |
| total_timesteps    | 4656000         |
| value_loss         | 3.9360375       |
-

-------------------------------------
| approxkl           | 3.798199e-05 |
| clipfrac           | 0.0          |
| explained_variance | 0.929        |
| fps                | 988          |
| n_updates          | 1470         |
| policy_entropy     | 0.13659368   |
| policy_loss        | 6.984413e-05 |
| serial_timesteps   | 4704000      |
| time_elapsed       | 8.19e+03     |
| total_timesteps    | 4704000      |
| value_loss         | 5.7374544    |
-------------------------------------
---------------------------------------
| approxkl           | 3.1611336e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.9            |
| fps                | 968            |
| n_updates          | 1471           |
| policy_entropy     | 0.180803       |
| policy_loss        | -0.00017024964 |
| serial_timesteps   | 4707200        |
| time_elapsed       | 8.19e+03       |
| total_timesteps    | 4707200        |
| value_loss         | 1.8993627      |
--------------------------

---------------------------------------
| approxkl           | 0.00025580236  |
| clipfrac           | 0.0015625      |
| explained_variance | 0.947          |
| fps                | 964            |
| n_updates          | 1487           |
| policy_entropy     | 0.17512608     |
| policy_loss        | -0.00026815018 |
| serial_timesteps   | 4758400        |
| time_elapsed       | 8.25e+03       |
| total_timesteps    | 4758400        |
| value_loss         | 1.8232782      |
---------------------------------------
--------------------------------------
| approxkl           | 0.0001547979  |
| clipfrac           | 0.000859375   |
| explained_variance | 0.818         |
| fps                | 1030          |
| n_updates          | 1488          |
| policy_entropy     | 0.096240446   |
| policy_loss        | 0.00014192291 |
| serial_timesteps   | 4761600       |
| time_elapsed       | 8.25e+03      |
| total_timesteps    | 4761600       |
| value_loss         | 12.590279     |
------------

--------------------------------------
| approxkl           | 0.00030831143 |
| clipfrac           | 0.0075        |
| explained_variance | 0.918         |
| fps                | 965           |
| n_updates          | 1504          |
| policy_entropy     | 0.20607191    |
| policy_loss        | 0.00012833222 |
| serial_timesteps   | 4812800       |
| time_elapsed       | 8.3e+03       |
| total_timesteps    | 4812800       |
| value_loss         | 6.6864214     |
--------------------------------------
--------------------------------------
| approxkl           | 0.0016886992  |
| clipfrac           | 0.02265625    |
| explained_variance | -0.309        |
| fps                | 927           |
| n_updates          | 1505          |
| policy_entropy     | 0.23873669    |
| policy_loss        | -0.0019344632 |
| serial_timesteps   | 4816000       |
| time_elapsed       | 8.3e+03       |
| total_timesteps    | 4816000       |
| value_loss         | 0.0910907     |
-------------------------

---------------------------------------
| approxkl           | 0.00033988844  |
| clipfrac           | 0.00046875002  |
| explained_variance | 0.944          |
| fps                | 940            |
| n_updates          | 1521           |
| policy_entropy     | 0.23065445     |
| policy_loss        | -0.00050101825 |
| serial_timesteps   | 4867200        |
| time_elapsed       | 8.36e+03       |
| total_timesteps    | 4867200        |
| value_loss         | 3.2020836      |
---------------------------------------
--------------------------------------
| approxkl           | 0.00074767385 |
| clipfrac           | 0.0           |
| explained_variance | 0.962         |
| fps                | 954           |
| n_updates          | 1522          |
| policy_entropy     | 0.32902268    |
| policy_loss        | 0.00038941822 |
| serial_timesteps   | 4870400       |
| time_elapsed       | 8.36e+03      |
| total_timesteps    | 4870400       |
| value_loss         | 2.7852516     |
------------

---------------------------------------
| approxkl           | 0.00017607506  |
| clipfrac           | 0.0009375      |
| explained_variance | 0.965          |
| fps                | 908            |
| n_updates          | 1538           |
| policy_entropy     | 0.24515241     |
| policy_loss        | -6.5764485e-05 |
| serial_timesteps   | 4921600        |
| time_elapsed       | 8.42e+03       |
| total_timesteps    | 4921600        |
| value_loss         | 2.7747092      |
---------------------------------------
--------------------------------------
| approxkl           | 0.00010434107 |
| clipfrac           | 0.000625      |
| explained_variance | 0.94          |
| fps                | 927           |
| n_updates          | 1539          |
| policy_entropy     | 0.14056572    |
| policy_loss        | 9.3327464e-05 |
| serial_timesteps   | 4924800       |
| time_elapsed       | 8.42e+03      |
| total_timesteps    | 4924800       |
| value_loss         | 3.53937       |
------------

--------------------------------------
| approxkl           | 0.00046723563 |
| clipfrac           | 0.004921875   |
| explained_variance | 0.955         |
| fps                | 1017          |
| n_updates          | 1555          |
| policy_entropy     | 0.16191944    |
| policy_loss        | 0.00035953455 |
| serial_timesteps   | 4976000       |
| time_elapsed       | 8.47e+03      |
| total_timesteps    | 4976000       |
| value_loss         | 2.9802845     |
--------------------------------------
--------------------------------------
| approxkl           | 0.00020537365 |
| clipfrac           | 0.001484375   |
| explained_variance | 0.96          |
| fps                | 987           |
| n_updates          | 1556          |
| policy_entropy     | 0.17365478    |
| policy_loss        | 0.00013790402 |
| serial_timesteps   | 4979200       |
| time_elapsed       | 8.48e+03      |
| total_timesteps    | 4979200       |
| value_loss         | 2.3416784     |
-------------------------

## Testing lesson2

In [62]:
# model_lesson2 = PPO2.load(model_names[2])
test(env_lesson2, model_lesson2, render = False)

Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished
Episode 7 finished
Episode 8 finished
Episode 9 finished
Episode 10 finished
Episode 11 finished
Episode 12 finished
Episode 13 finished
Episode 14 finished
Episode 15 finished
Episode 16 finished
Episode 17 finished
Episode 18 finished
Episode 19 finished
Episode 20 finished
Episode 21 finished
Episode 22 finished
Episode 23 finished
Episode 24 finished
Episode 25 finished
Episode 26 finished
Episode 27 finished
Episode 28 finished
Episode 29 finished
Episode 30 finished
Episode 31 finished
Episode 32 finished
Episode 33 finished
Episode 34 finished
Episode 35 finished
Episode 36 finished
Episode 37 finished
Episode 38 finished
Episode 39 finished
Episode 40 finished
Episode 41 finished
Episode 42 finished
Episode 43 finished
Episode 44 finished
Episode 45 finished
Episode 46 finished
Episode 47 finished
Episode 48 finished
Episode 49 finished
Episode 50

## Training lesson 3
### 11x11 grid with 16 wooden boxes

In [63]:
config_lesson3 = wood_box_lesson3_env()
env_lesson3 = initialize_env(config_lesson3)

In [64]:
# model_lesson2 = PPO2.load(load_path = model_names[2],
#                           tensorboard_log = "./ppo2_pommerman_box_collect_tensorboard/")
model_lesson3 = train(model_name = model_names[3], 
                      model = model_lesson2,
                      env = env_lesson3, 
                      n_steps = n_steps, 
                      total_timesteps = total_timestep)
# model_lesson3.save(model_names[3])

---------------------------------------
| approxkl           | 2.500835e-05   |
| clipfrac           | 0.0            |
| explained_variance | 0.907          |
| fps                | 904            |
| n_updates          | 1              |
| policy_entropy     | 0.1448917      |
| policy_loss        | -0.00012760595 |
| serial_timesteps   | 3200           |
| time_elapsed       | 3.1e-06        |
| total_timesteps    | 3200           |
| value_loss         | 6.008588       |
---------------------------------------
--------------------------------------
| approxkl           | 2.6183472e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.901         |
| fps                | 961           |
| n_updates          | 2             |
| policy_entropy     | 0.18287149    |
| policy_loss        | -7.895529e-06 |
| serial_timesteps   | 6400          |
| time_elapsed       | 3.55          |
| total_timesteps    | 6400          |
| value_loss         | 7.947686      |
------------

--------------------------------------
| approxkl           | 4.4999426e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.946         |
| fps                | 963           |
| n_updates          | 18            |
| policy_entropy     | 0.17021397    |
| policy_loss        | -8.312609e-07 |
| serial_timesteps   | 57600         |
| time_elapsed       | 58            |
| total_timesteps    | 57600         |
| value_loss         | 5.6312056     |
--------------------------------------
---------------------------------------
| approxkl           | 2.2625416e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.879          |
| fps                | 961            |
| n_updates          | 19             |
| policy_entropy     | 0.119366266    |
| policy_loss        | -8.1411075e-05 |
| serial_timesteps   | 60800          |
| time_elapsed       | 61.3           |
| total_timesteps    | 60800          |
| value_loss         | 6.163397       |
-------------

---------------------------------------
| approxkl           | 4.7075846e-05  |
| clipfrac           | 7.8125e-05     |
| explained_variance | 0.97           |
| fps                | 953            |
| n_updates          | 35             |
| policy_entropy     | 0.17822075     |
| policy_loss        | -5.0592796e-05 |
| serial_timesteps   | 112000         |
| time_elapsed       | 116            |
| total_timesteps    | 112000         |
| value_loss         | 0.7152832      |
---------------------------------------
--------------------------------------
| approxkl           | 0.00020427935 |
| clipfrac           | 0.003828125   |
| explained_variance | 0.783         |
| fps                | 1016          |
| n_updates          | 36            |
| policy_entropy     | 0.15171184    |
| policy_loss        | 7.714264e-05  |
| serial_timesteps   | 115200        |
| time_elapsed       | 119           |
| total_timesteps    | 115200        |
| value_loss         | 14.238252     |
------------

--------------------------------------
| approxkl           | 1.1485063e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.936         |
| fps                | 918           |
| n_updates          | 51            |
| policy_entropy     | 0.1287555     |
| policy_loss        | -2.175197e-05 |
| serial_timesteps   | 163200        |
| time_elapsed       | 169           |
| total_timesteps    | 163200        |
| value_loss         | 5.930783      |
--------------------------------------
--------------------------------------
| approxkl           | 3.3510783e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.932         |
| fps                | 912           |
| n_updates          | 52            |
| policy_entropy     | 0.16318238    |
| policy_loss        | -9.296634e-05 |
| serial_timesteps   | 166400        |
| time_elapsed       | 172           |
| total_timesteps    | 166400        |
| value_loss         | 6.8720345     |
-------------------------

---------------------------------------
| approxkl           | 5.768657e-06   |
| clipfrac           | 0.0            |
| explained_variance | 0.988          |
| fps                | 926            |
| n_updates          | 68             |
| policy_entropy     | 0.22195645     |
| policy_loss        | -1.4221891e-05 |
| serial_timesteps   | 217600         |
| time_elapsed       | 226            |
| total_timesteps    | 217600         |
| value_loss         | 0.65203065     |
---------------------------------------
--------------------------------------
| approxkl           | 8.953664e-05  |
| clipfrac           | 0.0           |
| explained_variance | 0.957         |
| fps                | 931           |
| n_updates          | 69            |
| policy_entropy     | 0.20705326    |
| policy_loss        | -0.0007269458 |
| serial_timesteps   | 220800        |
| time_elapsed       | 229           |
| total_timesteps    | 220800        |
| value_loss         | 2.3517802     |
------------

--------------------------------------
| approxkl           | 0.00010055547 |
| clipfrac           | 0.000390625   |
| explained_variance | -2.81         |
| fps                | 907           |
| n_updates          | 84            |
| policy_entropy     | 0.17826903    |
| policy_loss        | -0.0005684002 |
| serial_timesteps   | 268800        |
| time_elapsed       | 281           |
| total_timesteps    | 268800        |
| value_loss         | 0.16214392    |
--------------------------------------
---------------------------------------
| approxkl           | 0.00054629997  |
| clipfrac           | 0.0075         |
| explained_variance | 0.964          |
| fps                | 900            |
| n_updates          | 85             |
| policy_entropy     | 0.13322017     |
| policy_loss        | -0.00017674416 |
| serial_timesteps   | 272000         |
| time_elapsed       | 285            |
| total_timesteps    | 272000         |
| value_loss         | 2.870784       |
-------------

--------------------------------------
| approxkl           | 4.5328334e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.952         |
| fps                | 935           |
| n_updates          | 101           |
| policy_entropy     | 0.107205674   |
| policy_loss        | 2.1851806e-06 |
| serial_timesteps   | 323200        |
| time_elapsed       | 339           |
| total_timesteps    | 323200        |
| value_loss         | 5.4899306     |
--------------------------------------
----------------------------------------
| approxkl           | 1.2199655e-06   |
| clipfrac           | 0.0             |
| explained_variance | 0.942           |
| fps                | 899             |
| n_updates          | 102             |
| policy_entropy     | 0.0863762       |
| policy_loss        | -1.05856725e-05 |
| serial_timesteps   | 326400          |
| time_elapsed       | 342             |
| total_timesteps    | 326400          |
| value_loss         | 7.187031        |
-

---------------------------------------
| approxkl           | 0.00018359741  |
| clipfrac           | 0.0            |
| explained_variance | 0.943          |
| fps                | 974            |
| n_updates          | 117            |
| policy_entropy     | 0.26389304     |
| policy_loss        | -0.00045006254 |
| serial_timesteps   | 374400         |
| time_elapsed       | 393            |
| total_timesteps    | 374400         |
| value_loss         | 7.535832       |
---------------------------------------
--------------------------------------
| approxkl           | 3.3457472e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.95          |
| fps                | 979           |
| n_updates          | 118           |
| policy_entropy     | 0.18255523    |
| policy_loss        | -6.147362e-05 |
| serial_timesteps   | 377600        |
| time_elapsed       | 397           |
| total_timesteps    | 377600        |
| value_loss         | 3.6881957     |
------------

--------------------------------------
| approxkl           | 0.00015465877 |
| clipfrac           | 0.00125       |
| explained_variance | 0.952         |
| fps                | 1001          |
| n_updates          | 134           |
| policy_entropy     | 0.19592896    |
| policy_loss        | 1.8953233e-05 |
| serial_timesteps   | 428800        |
| time_elapsed       | 451           |
| total_timesteps    | 428800        |
| value_loss         | 3.2439322     |
--------------------------------------
--------------------------------------
| approxkl           | 0.0011358022  |
| clipfrac           | 0.013828125   |
| explained_variance | -2.23         |
| fps                | 999           |
| n_updates          | 135           |
| policy_entropy     | 0.1700192     |
| policy_loss        | -0.0007261534 |
| serial_timesteps   | 432000        |
| time_elapsed       | 454           |
| total_timesteps    | 432000        |
| value_loss         | 0.17049313    |
-------------------------

---------------------------------------
| approxkl           | 6.598033e-05   |
| clipfrac           | 0.0            |
| explained_variance | 0.947          |
| fps                | 803            |
| n_updates          | 151            |
| policy_entropy     | 0.19251257     |
| policy_loss        | -4.3325348e-05 |
| serial_timesteps   | 483200         |
| time_elapsed       | 512            |
| total_timesteps    | 483200         |
| value_loss         | 6.4784055      |
---------------------------------------
---------------------------------------
| approxkl           | 3.407423e-05   |
| clipfrac           | 0.0            |
| explained_variance | 0.955          |
| fps                | 733            |
| n_updates          | 152            |
| policy_entropy     | 0.20962983     |
| policy_loss        | -7.6129436e-05 |
| serial_timesteps   | 486400         |
| time_elapsed       | 516            |
| total_timesteps    | 486400         |
| value_loss         | 4.2093143      |


--------------------------------------
| approxkl           | 2.9658282e-05 |
| clipfrac           | 7.8125e-05    |
| explained_variance | 0.951         |
| fps                | 928           |
| n_updates          | 167           |
| policy_entropy     | 0.14254256    |
| policy_loss        | 3.5868212e-05 |
| serial_timesteps   | 534400        |
| time_elapsed       | 573           |
| total_timesteps    | 534400        |
| value_loss         | 4.3189754     |
--------------------------------------
---------------------------------------
| approxkl           | 7.757148e-06   |
| clipfrac           | 0.0            |
| explained_variance | 0.964          |
| fps                | 947            |
| n_updates          | 168            |
| policy_entropy     | 0.12737429     |
| policy_loss        | -2.4763644e-05 |
| serial_timesteps   | 537600         |
| time_elapsed       | 577            |
| total_timesteps    | 537600         |
| value_loss         | 3.447396       |
-------------

-------------------------------------
| approxkl           | 5.815572e-05 |
| clipfrac           | 0.0          |
| explained_variance | 0.941        |
| fps                | 1006         |
| n_updates          | 183          |
| policy_entropy     | 0.14457852   |
| policy_loss        | 5.856529e-05 |
| serial_timesteps   | 585600       |
| time_elapsed       | 632          |
| total_timesteps    | 585600       |
| value_loss         | 2.8237996    |
-------------------------------------
---------------------------------------
| approxkl           | 0.00057979894  |
| clipfrac           | 0.0073437495   |
| explained_variance | -10.5          |
| fps                | 977            |
| n_updates          | 184            |
| policy_entropy     | 0.15839295     |
| policy_loss        | -0.00029552652 |
| serial_timesteps   | 588800         |
| time_elapsed       | 635            |
| total_timesteps    | 588800         |
| value_loss         | 0.33544147     |
--------------------------

---------------------------------------
| approxkl           | 1.9543502e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.959          |
| fps                | 1001           |
| n_updates          | 200            |
| policy_entropy     | 0.1243366      |
| policy_loss        | -4.1737403e-05 |
| serial_timesteps   | 640000         |
| time_elapsed       | 689            |
| total_timesteps    | 640000         |
| value_loss         | 3.8841796      |
---------------------------------------
--------------------------------------
| approxkl           | 6.8618065e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.941         |
| fps                | 991           |
| n_updates          | 201           |
| policy_entropy     | 0.09232141    |
| policy_loss        | 3.819391e-05  |
| serial_timesteps   | 643200        |
| time_elapsed       | 692           |
| total_timesteps    | 643200        |
| value_loss         | 6.81962       |
------------

--------------------------------------
| approxkl           | 1.4338401e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.947         |
| fps                | 937           |
| n_updates          | 217           |
| policy_entropy     | 0.08839193    |
| policy_loss        | 7.587485e-05  |
| serial_timesteps   | 694400        |
| time_elapsed       | 746           |
| total_timesteps    | 694400        |
| value_loss         | 5.6922097     |
--------------------------------------
-------------------------------------
| approxkl           | 1.366127e-06 |
| clipfrac           | 0.0          |
| explained_variance | 0.91         |
| fps                | 921          |
| n_updates          | 218          |
| policy_entropy     | 0.04724168   |
| policy_loss        | 2.293624e-05 |
| serial_timesteps   | 697600       |
| time_elapsed       | 749          |
| total_timesteps    | 697600       |
| value_loss         | 8.2012005    |
-------------------------------------

---------------------------------------
| approxkl           | 4.036397e-06   |
| clipfrac           | 0.0            |
| explained_variance | 0.944          |
| fps                | 924            |
| n_updates          | 234            |
| policy_entropy     | 0.15860009     |
| policy_loss        | -2.5970898e-05 |
| serial_timesteps   | 748800         |
| time_elapsed       | 803            |
| total_timesteps    | 748800         |
| value_loss         | 3.135343       |
---------------------------------------
---------------------------------------
| approxkl           | 9.647757e-05   |
| clipfrac           | 0.00046875     |
| explained_variance | 0.241          |
| fps                | 913            |
| n_updates          | 235            |
| policy_entropy     | 0.16091211     |
| policy_loss        | -0.00010948487 |
| serial_timesteps   | 752000         |
| time_elapsed       | 807            |
| total_timesteps    | 752000         |
| value_loss         | 4.018577       |


---------------------------------------
| approxkl           | 0.00019546678  |
| clipfrac           | 0.0023437499   |
| explained_variance | 0.974          |
| fps                | 997            |
| n_updates          | 250            |
| policy_entropy     | 0.14394486     |
| policy_loss        | -0.00043083838 |
| serial_timesteps   | 800000         |
| time_elapsed       | 857            |
| total_timesteps    | 800000         |
| value_loss         | 0.6309489      |
---------------------------------------
--------------------------------------
| approxkl           | 0.0001823953  |
| clipfrac           | 0.001484375   |
| explained_variance | 0.957         |
| fps                | 1010          |
| n_updates          | 251           |
| policy_entropy     | 0.09232347    |
| policy_loss        | -0.0002509827 |
| serial_timesteps   | 803200        |
| time_elapsed       | 860           |
| total_timesteps    | 803200        |
| value_loss         | 5.363184      |
------------

--------------------------------------
| approxkl           | 8.990407e-06  |
| clipfrac           | 0.0           |
| explained_variance | 0.937         |
| fps                | 949           |
| n_updates          | 267           |
| policy_entropy     | 0.08312312    |
| policy_loss        | 3.2859592e-05 |
| serial_timesteps   | 854400        |
| time_elapsed       | 915           |
| total_timesteps    | 854400        |
| value_loss         | 7.616529      |
--------------------------------------
--------------------------------------
| approxkl           | 6.6882712e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.973         |
| fps                | 910           |
| n_updates          | 268           |
| policy_entropy     | 0.13701507    |
| policy_loss        | 2.104789e-07  |
| serial_timesteps   | 857600        |
| time_elapsed       | 919           |
| total_timesteps    | 857600        |
| value_loss         | 2.8668444     |
-------------------------

--------------------------------------
| approxkl           | 0.00014730018 |
| clipfrac           | 0.000859375   |
| explained_variance | 0.97          |
| fps                | 930           |
| n_updates          | 284           |
| policy_entropy     | 0.12181933    |
| policy_loss        | -1.027517e-05 |
| serial_timesteps   | 908800        |
| time_elapsed       | 972           |
| total_timesteps    | 908800        |
| value_loss         | 3.502334      |
--------------------------------------
---------------------------------------
| approxkl           | 0.00013432288  |
| clipfrac           | 0.00015625     |
| explained_variance | 0.951          |
| fps                | 899            |
| n_updates          | 285            |
| policy_entropy     | 0.18345064     |
| policy_loss        | -3.4914908e-06 |
| serial_timesteps   | 912000         |
| time_elapsed       | 975            |
| total_timesteps    | 912000         |
| value_loss         | 2.032571       |
-------------

--------------------------------------
| approxkl           | 2.2307328e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.966         |
| fps                | 992           |
| n_updates          | 300           |
| policy_entropy     | 0.10937994    |
| policy_loss        | 7.4576583e-06 |
| serial_timesteps   | 960000        |
| time_elapsed       | 1.03e+03      |
| total_timesteps    | 960000        |
| value_loss         | 5.2473254     |
--------------------------------------
--------------------------------------
| approxkl           | 1.2315557e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.97          |
| fps                | 999           |
| n_updates          | 301           |
| policy_entropy     | 0.14435133    |
| policy_loss        | 7.592812e-06  |
| serial_timesteps   | 963200        |
| time_elapsed       | 1.03e+03      |
| total_timesteps    | 963200        |
| value_loss         | 2.6956577     |
-------------------------

--------------------------------------
| approxkl           | 2.1007694e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.961         |
| fps                | 952           |
| n_updates          | 317           |
| policy_entropy     | 0.13639435    |
| policy_loss        | 2.7280487e-06 |
| serial_timesteps   | 1014400       |
| time_elapsed       | 1.08e+03      |
| total_timesteps    | 1014400       |
| value_loss         | 4.6339874     |
--------------------------------------
--------------------------------------
| approxkl           | 0.00062293245 |
| clipfrac           | 0.006484375   |
| explained_variance | -5.68         |
| fps                | 930           |
| n_updates          | 318           |
| policy_entropy     | 0.18623182    |
| policy_loss        | -0.0008417425 |
| serial_timesteps   | 1017600       |
| time_elapsed       | 1.09e+03      |
| total_timesteps    | 1017600       |
| value_loss         | 0.41450417    |
-------------------------

---------------------------------------
| approxkl           | 2.1925547e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.946          |
| fps                | 995            |
| n_updates          | 334            |
| policy_entropy     | 0.1631434      |
| policy_loss        | -8.9334324e-05 |
| serial_timesteps   | 1068800        |
| time_elapsed       | 1.14e+03       |
| total_timesteps    | 1068800        |
| value_loss         | 7.808558       |
---------------------------------------
--------------------------------------
| approxkl           | 1.7224265e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.961         |
| fps                | 1013          |
| n_updates          | 335           |
| policy_entropy     | 0.15401274    |
| policy_loss        | -9.852972e-05 |
| serial_timesteps   | 1072000       |
| time_elapsed       | 1.14e+03      |
| total_timesteps    | 1072000       |
| value_loss         | 2.905371      |
------------

--------------------------------------
| approxkl           | 8.391438e-05  |
| clipfrac           | 0.00015625    |
| explained_variance | 0.956         |
| fps                | 989           |
| n_updates          | 351           |
| policy_entropy     | 0.16403578    |
| policy_loss        | -6.759584e-05 |
| serial_timesteps   | 1123200       |
| time_elapsed       | 1.2e+03       |
| total_timesteps    | 1123200       |
| value_loss         | 3.1321163     |
--------------------------------------
---------------------------------------
| approxkl           | 4.7083326e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.968          |
| fps                | 985            |
| n_updates          | 352            |
| policy_entropy     | 0.17135727     |
| policy_loss        | -5.1004143e-05 |
| serial_timesteps   | 1126400        |
| time_elapsed       | 1.2e+03        |
| total_timesteps    | 1126400        |
| value_loss         | 2.739884       |
-------------

---------------------------------------
| approxkl           | 0.00019527717  |
| clipfrac           | 0.0018750001   |
| explained_variance | 0.968          |
| fps                | 925            |
| n_updates          | 367            |
| policy_entropy     | 0.13486482     |
| policy_loss        | -1.5013851e-05 |
| serial_timesteps   | 1174400        |
| time_elapsed       | 1.25e+03       |
| total_timesteps    | 1174400        |
| value_loss         | 2.8414564      |
---------------------------------------
--------------------------------------
| approxkl           | 0.00022794743 |
| clipfrac           | 0.00234375    |
| explained_variance | -5.86         |
| fps                | 921           |
| n_updates          | 368           |
| policy_entropy     | 0.20477921    |
| policy_loss        | -0.0003947321 |
| serial_timesteps   | 1177600       |
| time_elapsed       | 1.25e+03      |
| total_timesteps    | 1177600       |
| value_loss         | 0.8677306     |
------------

-------------------------------------
| approxkl           | 3.36952e-05  |
| clipfrac           | 0.0          |
| explained_variance | 0.946        |
| fps                | 992          |
| n_updates          | 384          |
| policy_entropy     | 0.092559025  |
| policy_loss        | 9.922848e-05 |
| serial_timesteps   | 1228800      |
| time_elapsed       | 1.3e+03      |
| total_timesteps    | 1228800      |
| value_loss         | 7.284109     |
-------------------------------------
---------------------------------------
| approxkl           | 1.8592333e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.955          |
| fps                | 1002           |
| n_updates          | 385            |
| policy_entropy     | 0.1050067      |
| policy_loss        | -2.0820871e-05 |
| serial_timesteps   | 1232000        |
| time_elapsed       | 1.31e+03       |
| total_timesteps    | 1232000        |
| value_loss         | 5.696284       |
--------------------------

---------------------------------------
| approxkl           | 2.373492e-06   |
| clipfrac           | 0.0            |
| explained_variance | 0.953          |
| fps                | 954            |
| n_updates          | 401            |
| policy_entropy     | 0.10986109     |
| policy_loss        | -2.2000074e-06 |
| serial_timesteps   | 1283200        |
| time_elapsed       | 1.36e+03       |
| total_timesteps    | 1283200        |
| value_loss         | 7.0894456      |
---------------------------------------
--------------------------------------
| approxkl           | 0.00018570625 |
| clipfrac           | 0.001328125   |
| explained_variance | -10.3         |
| fps                | 873           |
| n_updates          | 402           |
| policy_entropy     | 0.24447012    |
| policy_loss        | -0.0003916975 |
| serial_timesteps   | 1286400       |
| time_elapsed       | 1.36e+03      |
| total_timesteps    | 1286400       |
| value_loss         | 0.46965846    |
------------

---------------------------------------
| approxkl           | 0.00017845366  |
| clipfrac           | 0.001328125    |
| explained_variance | 0.971          |
| fps                | 955            |
| n_updates          | 418            |
| policy_entropy     | 0.1667309      |
| policy_loss        | -0.00010720767 |
| serial_timesteps   | 1337600        |
| time_elapsed       | 1.42e+03       |
| total_timesteps    | 1337600        |
| value_loss         | 2.8079498      |
---------------------------------------
--------------------------------------
| approxkl           | 7.666444e-05  |
| clipfrac           | 7.8125e-05    |
| explained_variance | 0.96          |
| fps                | 1011          |
| n_updates          | 419           |
| policy_entropy     | 0.10887018    |
| policy_loss        | 3.1701737e-05 |
| serial_timesteps   | 1340800       |
| time_elapsed       | 1.42e+03      |
| total_timesteps    | 1340800       |
| value_loss         | 5.7141027     |
------------

--------------------------------------
| approxkl           | 5.599851e-05  |
| clipfrac           | 0.0           |
| explained_variance | 0.957         |
| fps                | 1041          |
| n_updates          | 435           |
| policy_entropy     | 0.19644701    |
| policy_loss        | -1.831144e-05 |
| serial_timesteps   | 1392000       |
| time_elapsed       | 1.47e+03      |
| total_timesteps    | 1392000       |
| value_loss         | 2.5986004     |
--------------------------------------
--------------------------------------
| approxkl           | 4.226648e-05  |
| clipfrac           | 0.0           |
| explained_variance | 0.978         |
| fps                | 1000          |
| n_updates          | 436           |
| policy_entropy     | 0.17099127    |
| policy_loss        | -9.921298e-05 |
| serial_timesteps   | 1395200       |
| time_elapsed       | 1.48e+03      |
| total_timesteps    | 1395200       |
| value_loss         | 2.7134013     |
-------------------------

-------------------------------------
| approxkl           | 9.08258e-06  |
| clipfrac           | 0.0          |
| explained_variance | 0.969        |
| fps                | 1029         |
| n_updates          | 451          |
| policy_entropy     | 0.10701206   |
| policy_loss        | 5.937517e-06 |
| serial_timesteps   | 1443200      |
| time_elapsed       | 1.53e+03     |
| total_timesteps    | 1443200      |
| value_loss         | 3.813035     |
-------------------------------------
---------------------------------------
| approxkl           | 5.827199e-06   |
| clipfrac           | 0.0            |
| explained_variance | 0.959          |
| fps                | 1024           |
| n_updates          | 452            |
| policy_entropy     | 0.14561795     |
| policy_loss        | -1.5872865e-05 |
| serial_timesteps   | 1446400        |
| time_elapsed       | 1.53e+03       |
| total_timesteps    | 1446400        |
| value_loss         | 4.166711       |
--------------------------

--------------------------------------
| approxkl           | 1.2265786e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.975         |
| fps                | 968           |
| n_updates          | 468           |
| policy_entropy     | 0.10207158    |
| policy_loss        | -3.591977e-05 |
| serial_timesteps   | 1497600       |
| time_elapsed       | 1.58e+03      |
| total_timesteps    | 1497600       |
| value_loss         | 4.556847      |
--------------------------------------
--------------------------------------
| approxkl           | 1.2485745e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.954         |
| fps                | 984           |
| n_updates          | 469           |
| policy_entropy     | 0.09243132    |
| policy_loss        | -1.792744e-05 |
| serial_timesteps   | 1500800       |
| time_elapsed       | 1.58e+03      |
| total_timesteps    | 1500800       |
| value_loss         | 6.258991      |
-------------------------

--------------------------------------
| approxkl           | 0.00037893484 |
| clipfrac           | 0.003671875   |
| explained_variance | -3.52         |
| fps                | 932           |
| n_updates          | 484           |
| policy_entropy     | 0.18802972    |
| policy_loss        | -0.0007228262 |
| serial_timesteps   | 1548800       |
| time_elapsed       | 1.63e+03      |
| total_timesteps    | 1548800       |
| value_loss         | 0.34607676    |
--------------------------------------
---------------------------------------
| approxkl           | 0.00088717055  |
| clipfrac           | 0.010859376    |
| explained_variance | 0.863          |
| fps                | 933            |
| n_updates          | 485            |
| policy_entropy     | 0.16830343     |
| policy_loss        | -0.00020320929 |
| serial_timesteps   | 1552000        |
| time_elapsed       | 1.63e+03       |
| total_timesteps    | 1552000        |
| value_loss         | 0.5733655      |
-------------

--------------------------------------
| approxkl           | 0.00049008883 |
| clipfrac           | 0.007109375   |
| explained_variance | 0.962         |
| fps                | 960           |
| n_updates          | 501           |
| policy_entropy     | 0.22457707    |
| policy_loss        | -0.0009133365 |
| serial_timesteps   | 1603200       |
| time_elapsed       | 1.69e+03      |
| total_timesteps    | 1603200       |
| value_loss         | 5.2965884     |
--------------------------------------
---------------------------------------
| approxkl           | 7.2659735e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.941          |
| fps                | 903            |
| n_updates          | 502            |
| policy_entropy     | 0.14296499     |
| policy_loss        | -0.00012519829 |
| serial_timesteps   | 1606400        |
| time_elapsed       | 1.69e+03       |
| total_timesteps    | 1606400        |
| value_loss         | 4.8837013      |
-------------

--------------------------------------
| approxkl           | 0.012933381   |
| clipfrac           | 0.08164062    |
| explained_variance | -0.0102       |
| fps                | 973           |
| n_updates          | 518           |
| policy_entropy     | 0.22311407    |
| policy_loss        | -6.885454e-05 |
| serial_timesteps   | 1657600       |
| time_elapsed       | 1.75e+03      |
| total_timesteps    | 1657600       |
| value_loss         | 0.0343385     |
--------------------------------------
-------------------------------------
| approxkl           | 0.0022502437 |
| clipfrac           | 0.018671874  |
| explained_variance | 0.952        |
| fps                | 1025         |
| n_updates          | 519          |
| policy_entropy     | 0.1828751    |
| policy_loss        | 0.001123002  |
| serial_timesteps   | 1660800      |
| time_elapsed       | 1.75e+03     |
| total_timesteps    | 1660800      |
| value_loss         | 5.3393526    |
-------------------------------------

--------------------------------------
| approxkl           | 4.1250336e-05 |
| clipfrac           | 7.8125e-05    |
| explained_variance | 0.919         |
| fps                | 1045          |
| n_updates          | 535           |
| policy_entropy     | 0.11710358    |
| policy_loss        | -9.301849e-05 |
| serial_timesteps   | 1712000       |
| time_elapsed       | 1.8e+03       |
| total_timesteps    | 1712000       |
| value_loss         | 6.453347      |
--------------------------------------
--------------------------------------
| approxkl           | 0.00038895092 |
| clipfrac           | 0.008828125   |
| explained_variance | 0.974         |
| fps                | 934           |
| n_updates          | 536           |
| policy_entropy     | 0.22291307    |
| policy_loss        | -0.0005491959 |
| serial_timesteps   | 1715200       |
| time_elapsed       | 1.8e+03       |
| total_timesteps    | 1715200       |
| value_loss         | 2.0618398     |
-------------------------

--------------------------------------
| approxkl           | 0.00025527677 |
| clipfrac           | 0.0021875     |
| explained_variance | 0.945         |
| fps                | 943           |
| n_updates          | 552           |
| policy_entropy     | 0.18871447    |
| policy_loss        | 1.6073715e-05 |
| serial_timesteps   | 1766400       |
| time_elapsed       | 1.86e+03      |
| total_timesteps    | 1766400       |
| value_loss         | 7.4153304     |
--------------------------------------
--------------------------------------
| approxkl           | 0.00021847454 |
| clipfrac           | 0.002265625   |
| explained_variance | 0.933         |
| fps                | 949           |
| n_updates          | 553           |
| policy_entropy     | 0.08834189    |
| policy_loss        | -7.474281e-05 |
| serial_timesteps   | 1769600       |
| time_elapsed       | 1.86e+03      |
| total_timesteps    | 1769600       |
| value_loss         | 6.9388533     |
-------------------------

---------------------------------------
| approxkl           | 9.659176e-05   |
| clipfrac           | 0.0003125      |
| explained_variance | 0.977          |
| fps                | 948            |
| n_updates          | 568            |
| policy_entropy     | 0.122979164    |
| policy_loss        | -0.00024962335 |
| serial_timesteps   | 1817600        |
| time_elapsed       | 1.91e+03       |
| total_timesteps    | 1817600        |
| value_loss         | 2.5873075      |
---------------------------------------
---------------------------------------
| approxkl           | 0.00010891972  |
| clipfrac           | 0.00078125     |
| explained_variance | 0.983          |
| fps                | 1007           |
| n_updates          | 569            |
| policy_entropy     | 0.13488308     |
| policy_loss        | -0.00014581799 |
| serial_timesteps   | 1820800        |
| time_elapsed       | 1.91e+03       |
| total_timesteps    | 1820800        |
| value_loss         | 1.4904988      |


---------------------------------------
| approxkl           | 4.5858997e-06  |
| clipfrac           | 0.0            |
| explained_variance | 0.97           |
| fps                | 913            |
| n_updates          | 585            |
| policy_entropy     | 0.09769947     |
| policy_loss        | -3.0061678e-05 |
| serial_timesteps   | 1872000        |
| time_elapsed       | 1.97e+03       |
| total_timesteps    | 1872000        |
| value_loss         | 3.2231498      |
---------------------------------------
---------------------------------------
| approxkl           | 8.94673e-06    |
| clipfrac           | 0.0            |
| explained_variance | 0.96           |
| fps                | 894            |
| n_updates          | 586            |
| policy_entropy     | 0.14217114     |
| policy_loss        | -1.0825619e-05 |
| serial_timesteps   | 1875200        |
| time_elapsed       | 1.97e+03       |
| total_timesteps    | 1875200        |
| value_loss         | 2.3340883      |


---------------------------------------
| approxkl           | 0.00014414512  |
| clipfrac           | 0.0003125      |
| explained_variance | 0.987          |
| fps                | 969            |
| n_updates          | 601            |
| policy_entropy     | 0.16344929     |
| policy_loss        | -0.00028728988 |
| serial_timesteps   | 1923200        |
| time_elapsed       | 2.02e+03       |
| total_timesteps    | 1923200        |
| value_loss         | 0.3630744      |
---------------------------------------
--------------------------------------
| approxkl           | 0.00026045553 |
| clipfrac           | 0.0028124999  |
| explained_variance | 0.936         |
| fps                | 1010          |
| n_updates          | 602           |
| policy_entropy     | 0.09105849    |
| policy_loss        | 0.00027104968 |
| serial_timesteps   | 1926400       |
| time_elapsed       | 2.02e+03      |
| total_timesteps    | 1926400       |
| value_loss         | 7.713654      |
------------

---------------------------------------
| approxkl           | 0.00031000812  |
| clipfrac           | 0.0034375      |
| explained_variance | 0.955          |
| fps                | 1063           |
| n_updates          | 618            |
| policy_entropy     | 0.11638996     |
| policy_loss        | -0.00018829193 |
| serial_timesteps   | 1977600        |
| time_elapsed       | 2.08e+03       |
| total_timesteps    | 1977600        |
| value_loss         | 5.361906       |
---------------------------------------
---------------------------------------
| approxkl           | 0.00017644512  |
| clipfrac           | 0.000859375    |
| explained_variance | 0.961          |
| fps                | 1041           |
| n_updates          | 619            |
| policy_entropy     | 0.1523617      |
| policy_loss        | -1.0666997e-05 |
| serial_timesteps   | 1980800        |
| time_elapsed       | 2.08e+03       |
| total_timesteps    | 1980800        |
| value_loss         | 2.8801973      |


---------------------------------------
| approxkl           | 0.00018082268  |
| clipfrac           | 0.0016406251   |
| explained_variance | 0.963          |
| fps                | 986            |
| n_updates          | 635            |
| policy_entropy     | 0.12210344     |
| policy_loss        | -3.9023234e-05 |
| serial_timesteps   | 2032000        |
| time_elapsed       | 2.13e+03       |
| total_timesteps    | 2032000        |
| value_loss         | 5.120031       |
---------------------------------------
--------------------------------------
| approxkl           | 0.0009785306  |
| clipfrac           | 0.0103124995  |
| explained_variance | 0.986         |
| fps                | 942           |
| n_updates          | 636           |
| policy_entropy     | 0.18591805    |
| policy_loss        | -0.0005174045 |
| serial_timesteps   | 2035200       |
| time_elapsed       | 2.13e+03      |
| total_timesteps    | 2035200       |
| value_loss         | 0.31232387    |
------------

---------------------------------------
| approxkl           | 0.00028809634  |
| clipfrac           | 0.0021093749   |
| explained_variance | 0.987          |
| fps                | 949            |
| n_updates          | 651            |
| policy_entropy     | 0.18017884     |
| policy_loss        | -0.00015879425 |
| serial_timesteps   | 2083200        |
| time_elapsed       | 2.18e+03       |
| total_timesteps    | 2083200        |
| value_loss         | 0.4455556      |
---------------------------------------
--------------------------------------
| approxkl           | 0.00015772383 |
| clipfrac           | 0.00171875    |
| explained_variance | 0.934         |
| fps                | 961           |
| n_updates          | 652           |
| policy_entropy     | 0.07737909    |
| policy_loss        | 7.165111e-06  |
| serial_timesteps   | 2086400       |
| time_elapsed       | 2.19e+03      |
| total_timesteps    | 2086400       |
| value_loss         | 7.7832274     |
------------

--------------------------------------
| approxkl           | 6.2747844e-05 |
| clipfrac           | 0.000234375   |
| explained_variance | 0.912         |
| fps                | 1014          |
| n_updates          | 667           |
| policy_entropy     | 0.12858184    |
| policy_loss        | -9.783208e-05 |
| serial_timesteps   | 2134400       |
| time_elapsed       | 2.24e+03      |
| total_timesteps    | 2134400       |
| value_loss         | 8.222091      |
--------------------------------------
---------------------------------------
| approxkl           | 2.0069128e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.966          |
| fps                | 1006           |
| n_updates          | 668            |
| policy_entropy     | 0.12400374     |
| policy_loss        | -5.3265467e-05 |
| serial_timesteps   | 2137600        |
| time_elapsed       | 2.24e+03       |
| total_timesteps    | 2137600        |
| value_loss         | 3.8214037      |
-------------

---------------------------------------
| approxkl           | 1.56172e-05    |
| clipfrac           | 0.0            |
| explained_variance | 0.988          |
| fps                | 983            |
| n_updates          | 684            |
| policy_entropy     | 0.18362802     |
| policy_loss        | -5.7937206e-05 |
| serial_timesteps   | 2188800        |
| time_elapsed       | 2.29e+03       |
| total_timesteps    | 2188800        |
| value_loss         | 0.45190924     |
---------------------------------------
---------------------------------------
| approxkl           | 1.2096088e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.881          |
| fps                | 1010           |
| n_updates          | 685            |
| policy_entropy     | 0.07480211     |
| policy_loss        | -0.00012238533 |
| serial_timesteps   | 2192000        |
| time_elapsed       | 2.3e+03        |
| total_timesteps    | 2192000        |
| value_loss         | 10.611236      |


---------------------------------------
| approxkl           | 8.218777e-06   |
| clipfrac           | 0.0            |
| explained_variance | 0.969          |
| fps                | 1026           |
| n_updates          | 701            |
| policy_entropy     | 0.14586294     |
| policy_loss        | -1.9562096e-05 |
| serial_timesteps   | 2243200        |
| time_elapsed       | 2.35e+03       |
| total_timesteps    | 2243200        |
| value_loss         | 3.7178233      |
---------------------------------------
--------------------------------------
| approxkl           | 4.0783307e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.962         |
| fps                | 1017          |
| n_updates          | 702           |
| policy_entropy     | 0.15503114    |
| policy_loss        | 1.7423481e-05 |
| serial_timesteps   | 2246400       |
| time_elapsed       | 2.35e+03      |
| total_timesteps    | 2246400       |
| value_loss         | 3.7321186     |
------------

--------------------------------------
| approxkl           | 2.2892286e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.955         |
| fps                | 1020          |
| n_updates          | 718           |
| policy_entropy     | 0.10231442    |
| policy_loss        | 3.2752746e-07 |
| serial_timesteps   | 2297600       |
| time_elapsed       | 2.41e+03      |
| total_timesteps    | 2297600       |
| value_loss         | 5.456341      |
--------------------------------------
--------------------------------------
| approxkl           | 9.4947234e-07 |
| clipfrac           | 0.0           |
| explained_variance | 0.948         |
| fps                | 1034          |
| n_updates          | 719           |
| policy_entropy     | 0.08035226    |
| policy_loss        | 3.6504864e-06 |
| serial_timesteps   | 2300800       |
| time_elapsed       | 2.41e+03      |
| total_timesteps    | 2300800       |
| value_loss         | 5.655645      |
-------------------------

--------------------------------------
| approxkl           | 0.00070162304 |
| clipfrac           | 0.00828125    |
| explained_variance | 0.949         |
| fps                | 944           |
| n_updates          | 735           |
| policy_entropy     | 0.14661755    |
| policy_loss        | 0.00028502772 |
| serial_timesteps   | 2352000       |
| time_elapsed       | 2.46e+03      |
| total_timesteps    | 2352000       |
| value_loss         | 3.2179303     |
--------------------------------------
--------------------------------------
| approxkl           | 0.00023508572 |
| clipfrac           | 0.0021093749  |
| explained_variance | 0.967         |
| fps                | 955           |
| n_updates          | 736           |
| policy_entropy     | 0.1326056     |
| policy_loss        | 7.1456284e-06 |
| serial_timesteps   | 2355200       |
| time_elapsed       | 2.46e+03      |
| total_timesteps    | 2355200       |
| value_loss         | 2.7208967     |
-------------------------

--------------------------------------
| approxkl           | 0.0009585878  |
| clipfrac           | 0.0128125     |
| explained_variance | 0.692         |
| fps                | 995           |
| n_updates          | 752           |
| policy_entropy     | 0.19007424    |
| policy_loss        | -0.0010973195 |
| serial_timesteps   | 2406400       |
| time_elapsed       | 2.52e+03      |
| total_timesteps    | 2406400       |
| value_loss         | 0.124591656   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0013134775  |
| clipfrac           | 0.014453124   |
| explained_variance | 0.976         |
| fps                | 992           |
| n_updates          | 753           |
| policy_entropy     | 0.18597837    |
| policy_loss        | -0.0008818579 |
| serial_timesteps   | 2409600       |
| time_elapsed       | 2.52e+03      |
| total_timesteps    | 2409600       |
| value_loss         | 0.21205072    |
-------------------------

---------------------------------------
| approxkl           | 1.9143424e-06  |
| clipfrac           | 0.0            |
| explained_variance | 0.97           |
| fps                | 923            |
| n_updates          | 769            |
| policy_entropy     | 0.16275297     |
| policy_loss        | -2.1471977e-05 |
| serial_timesteps   | 2460800        |
| time_elapsed       | 2.57e+03       |
| total_timesteps    | 2460800        |
| value_loss         | 2.289956       |
---------------------------------------
---------------------------------------
| approxkl           | 7.222966e-05   |
| clipfrac           | 0.000234375    |
| explained_variance | 0.971          |
| fps                | 910            |
| n_updates          | 770            |
| policy_entropy     | 0.19745125     |
| policy_loss        | -0.00034688314 |
| serial_timesteps   | 2464000        |
| time_elapsed       | 2.58e+03       |
| total_timesteps    | 2464000        |
| value_loss         | 0.6808505      |


--------------------------------------
| approxkl           | 0.0028459646  |
| clipfrac           | 0.029531252   |
| explained_variance | -0.444        |
| fps                | 959           |
| n_updates          | 786           |
| policy_entropy     | 0.19578426    |
| policy_loss        | -0.0025404864 |
| serial_timesteps   | 2515200       |
| time_elapsed       | 2.63e+03      |
| total_timesteps    | 2515200       |
| value_loss         | 0.033919454   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0010873466 |
| clipfrac           | 0.013125     |
| explained_variance | 0.973        |
| fps                | 1033         |
| n_updates          | 787          |
| policy_entropy     | 0.15167977   |
| policy_loss        | 8.140341e-05 |
| serial_timesteps   | 2518400      |
| time_elapsed       | 2.63e+03     |
| total_timesteps    | 2518400      |
| value_loss         | 2.585603     |
-------------------------------------

--------------------------------------
| approxkl           | 5.3693275e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.936         |
| fps                | 960           |
| n_updates          | 803           |
| policy_entropy     | 0.20426878    |
| policy_loss        | -7.532425e-05 |
| serial_timesteps   | 2569600       |
| time_elapsed       | 2.68e+03      |
| total_timesteps    | 2569600       |
| value_loss         | 1.6850078     |
--------------------------------------
--------------------------------------
| approxkl           | 3.102845e-05  |
| clipfrac           | 0.0           |
| explained_variance | 0.965         |
| fps                | 1016          |
| n_updates          | 804           |
| policy_entropy     | 0.14290155    |
| policy_loss        | 3.6292822e-05 |
| serial_timesteps   | 2572800       |
| time_elapsed       | 2.69e+03      |
| total_timesteps    | 2572800       |
| value_loss         | 4.083418      |
-------------------------

---------------------------------------
| approxkl           | 7.459165e-05   |
| clipfrac           | 0.0            |
| explained_variance | 0.953          |
| fps                | 956            |
| n_updates          | 819            |
| policy_entropy     | 0.18600705     |
| policy_loss        | -0.00019278139 |
| serial_timesteps   | 2620800        |
| time_elapsed       | 2.74e+03       |
| total_timesteps    | 2620800        |
| value_loss         | 7.208381       |
---------------------------------------
--------------------------------------
| approxkl           | 0.0005209925  |
| clipfrac           | 0.00640625    |
| explained_variance | -3.73         |
| fps                | 940           |
| n_updates          | 820           |
| policy_entropy     | 0.21608332    |
| policy_loss        | -0.0005547191 |
| serial_timesteps   | 2624000       |
| time_elapsed       | 2.74e+03      |
| total_timesteps    | 2624000       |
| value_loss         | 0.38662744    |
------------

-------------------------------------
| approxkl           | 0.0014732343 |
| clipfrac           | 0.01765625   |
| explained_variance | 0.962        |
| fps                | 1008         |
| n_updates          | 836          |
| policy_entropy     | 0.27272844   |
| policy_loss        | 0.0022283958 |
| serial_timesteps   | 2675200      |
| time_elapsed       | 2.79e+03     |
| total_timesteps    | 2675200      |
| value_loss         | 5.2924695    |
-------------------------------------
--------------------------------------
| approxkl           | 0.00021732652 |
| clipfrac           | 0.003125      |
| explained_variance | 0.976         |
| fps                | 918           |
| n_updates          | 837           |
| policy_entropy     | 0.20931233    |
| policy_loss        | 0.00024516298 |
| serial_timesteps   | 2678400       |
| time_elapsed       | 2.79e+03      |
| total_timesteps    | 2678400       |
| value_loss         | 2.228992      |
--------------------------------------

--------------------------------------
| approxkl           | 0.0004493785  |
| clipfrac           | 0.01046875    |
| explained_variance | 0.949         |
| fps                | 1050          |
| n_updates          | 852           |
| policy_entropy     | 0.20087568    |
| policy_loss        | 0.00034909789 |
| serial_timesteps   | 2726400       |
| time_elapsed       | 2.85e+03      |
| total_timesteps    | 2726400       |
| value_loss         | 6.3970046     |
--------------------------------------
---------------------------------------
| approxkl           | 6.5943124e-05  |
| clipfrac           | 7.8125e-05     |
| explained_variance | 0.973          |
| fps                | 1024           |
| n_updates          | 853            |
| policy_entropy     | 0.18849376     |
| policy_loss        | -2.8682873e-05 |
| serial_timesteps   | 2729600        |
| time_elapsed       | 2.85e+03       |
| total_timesteps    | 2729600        |
| value_loss         | 3.1142616      |
-------------

---------------------------------------
| approxkl           | 0.00027638164  |
| clipfrac           | 0.0021875      |
| explained_variance | 0.987          |
| fps                | 943            |
| n_updates          | 869            |
| policy_entropy     | 0.14732245     |
| policy_loss        | -0.00013394974 |
| serial_timesteps   | 2780800        |
| time_elapsed       | 2.9e+03        |
| total_timesteps    | 2780800        |
| value_loss         | 0.9623491      |
---------------------------------------
---------------------------------------
| approxkl           | 7.3638046e-05  |
| clipfrac           | 0.0003125      |
| explained_variance | 0.966          |
| fps                | 960            |
| n_updates          | 870            |
| policy_entropy     | 0.13903944     |
| policy_loss        | -0.00012002915 |
| serial_timesteps   | 2784000        |
| time_elapsed       | 2.9e+03        |
| total_timesteps    | 2784000        |
| value_loss         | 5.015436       |


---------------------------------------
| approxkl           | 2.9384673e-06  |
| clipfrac           | 0.0            |
| explained_variance | 0.958          |
| fps                | 886            |
| n_updates          | 885            |
| policy_entropy     | 0.10131945     |
| policy_loss        | -1.9924341e-06 |
| serial_timesteps   | 2832000        |
| time_elapsed       | 2.96e+03       |
| total_timesteps    | 2832000        |
| value_loss         | 5.3302636      |
---------------------------------------
---------------------------------------
| approxkl           | 5.2255155e-06  |
| clipfrac           | 0.0            |
| explained_variance | 0.953          |
| fps                | 849            |
| n_updates          | 886            |
| policy_entropy     | 0.16753858     |
| policy_loss        | -1.4983714e-05 |
| serial_timesteps   | 2835200        |
| time_elapsed       | 2.96e+03       |
| total_timesteps    | 2835200        |
| value_loss         | 2.1285224      |


--------------------------------------
| approxkl           | 2.8077227e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.957         |
| fps                | 864           |
| n_updates          | 901           |
| policy_entropy     | 0.11582206    |
| policy_loss        | 3.060527e-05  |
| serial_timesteps   | 2883200       |
| time_elapsed       | 3.02e+03      |
| total_timesteps    | 2883200       |
| value_loss         | 5.3366694     |
--------------------------------------
---------------------------------------
| approxkl           | 3.0302364e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.96           |
| fps                | 872            |
| n_updates          | 902            |
| policy_entropy     | 0.2029028      |
| policy_loss        | -0.00011006886 |
| serial_timesteps   | 2886400        |
| time_elapsed       | 3.02e+03       |
| total_timesteps    | 2886400        |
| value_loss         | 2.5861683      |
-------------

---------------------------------------
| approxkl           | 7.3047704e-05  |
| clipfrac           | 0.00015625     |
| explained_variance | 0.968          |
| fps                | 901            |
| n_updates          | 917            |
| policy_entropy     | 0.10617582     |
| policy_loss        | -0.00012603089 |
| serial_timesteps   | 2934400        |
| time_elapsed       | 3.08e+03       |
| total_timesteps    | 2934400        |
| value_loss         | 5.4432735      |
---------------------------------------
--------------------------------------
| approxkl           | 6.168803e-05  |
| clipfrac           | 0.0           |
| explained_variance | 0.966         |
| fps                | 903           |
| n_updates          | 918           |
| policy_entropy     | 0.16053587    |
| policy_loss        | -0.0001430074 |
| serial_timesteps   | 2937600       |
| time_elapsed       | 3.08e+03      |
| total_timesteps    | 2937600       |
| value_loss         | 2.7633832     |
------------

--------------------------------------
| approxkl           | 2.9991654e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.986         |
| fps                | 935           |
| n_updates          | 934           |
| policy_entropy     | 0.14820926    |
| policy_loss        | -7.03723e-05  |
| serial_timesteps   | 2988800       |
| time_elapsed       | 3.13e+03      |
| total_timesteps    | 2988800       |
| value_loss         | 1.2632017     |
--------------------------------------
--------------------------------------
| approxkl           | 1.5506255e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.946         |
| fps                | 971           |
| n_updates          | 935           |
| policy_entropy     | 0.092613555   |
| policy_loss        | 2.0547956e-05 |
| serial_timesteps   | 2992000       |
| time_elapsed       | 3.13e+03      |
| total_timesteps    | 2992000       |
| value_loss         | 6.514798      |
-------------------------

---------------------------------------
| approxkl           | 9.2643626e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.96           |
| fps                | 970            |
| n_updates          | 951            |
| policy_entropy     | 0.22228514     |
| policy_loss        | -0.00010843285 |
| serial_timesteps   | 3043200        |
| time_elapsed       | 3.19e+03       |
| total_timesteps    | 3043200        |
| value_loss         | 5.4277716      |
---------------------------------------
---------------------------------------
| approxkl           | 9.2564675e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.957          |
| fps                | 988            |
| n_updates          | 952            |
| policy_entropy     | 0.23764867     |
| policy_loss        | -1.9524097e-05 |
| serial_timesteps   | 3046400        |
| time_elapsed       | 3.19e+03       |
| total_timesteps    | 3046400        |
| value_loss         | 5.461005       |


---------------------------------------
| approxkl           | 0.00013816482  |
| clipfrac           | 0.000703125    |
| explained_variance | 0.978          |
| fps                | 975            |
| n_updates          | 967            |
| policy_entropy     | 0.19756502     |
| policy_loss        | -0.00013543078 |
| serial_timesteps   | 3094400        |
| time_elapsed       | 3.24e+03       |
| total_timesteps    | 3094400        |
| value_loss         | 2.8246667      |
---------------------------------------
--------------------------------------
| approxkl           | 0.0001676045  |
| clipfrac           | 0.00015625    |
| explained_variance | 0.877         |
| fps                | 994           |
| n_updates          | 968           |
| policy_entropy     | 0.19236983    |
| policy_loss        | -8.392926e-06 |
| serial_timesteps   | 3097600       |
| time_elapsed       | 3.24e+03      |
| total_timesteps    | 3097600       |
| value_loss         | 4.413431      |
------------

--------------------------------------
| approxkl           | 1.59429e-05   |
| clipfrac           | 0.0           |
| explained_variance | 0.967         |
| fps                | 969           |
| n_updates          | 984           |
| policy_entropy     | 0.25610256    |
| policy_loss        | -3.426671e-05 |
| serial_timesteps   | 3148800       |
| time_elapsed       | 3.3e+03       |
| total_timesteps    | 3148800       |
| value_loss         | 2.7041876     |
--------------------------------------
---------------------------------------
| approxkl           | 0.0003075761   |
| clipfrac           | 0.00984375     |
| explained_variance | 0.975          |
| fps                | 982            |
| n_updates          | 985            |
| policy_entropy     | 0.25729162     |
| policy_loss        | -0.00043098556 |
| serial_timesteps   | 3152000        |
| time_elapsed       | 3.3e+03        |
| total_timesteps    | 3152000        |
| value_loss         | 2.6810796      |
-------------

---------------------------------------
| approxkl           | 0.000119852804 |
| clipfrac           | 0.00093750004  |
| explained_variance | 0.957          |
| fps                | 1027           |
| n_updates          | 1001           |
| policy_entropy     | 0.1340319      |
| policy_loss        | 5.9701204e-05  |
| serial_timesteps   | 3203200        |
| time_elapsed       | 3.35e+03       |
| total_timesteps    | 3203200        |
| value_loss         | 5.7835455      |
---------------------------------------
---------------------------------------
| approxkl           | 1.35363125e-05 |
| clipfrac           | 0.0            |
| explained_variance | 0.935          |
| fps                | 1036           |
| n_updates          | 1002           |
| policy_entropy     | 0.057096723    |
| policy_loss        | -3.8405506e-06 |
| serial_timesteps   | 3206400        |
| time_elapsed       | 3.36e+03       |
| total_timesteps    | 3206400        |
| value_loss         | 7.5220227      |


---------------------------------------
| approxkl           | 1.2863091e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.978          |
| fps                | 902            |
| n_updates          | 1017           |
| policy_entropy     | 0.1577408      |
| policy_loss        | -4.6380235e-05 |
| serial_timesteps   | 3254400        |
| time_elapsed       | 3.4e+03        |
| total_timesteps    | 3254400        |
| value_loss         | 2.7002628      |
---------------------------------------
--------------------------------------
| approxkl           | 0.0011964905  |
| clipfrac           | 0.01234375    |
| explained_variance | -2.08         |
| fps                | 899           |
| n_updates          | 1018          |
| policy_entropy     | 0.2342763     |
| policy_loss        | -0.0006188342 |
| serial_timesteps   | 3257600       |
| time_elapsed       | 3.41e+03      |
| total_timesteps    | 3257600       |
| value_loss         | 0.3856174     |
------------

---------------------------------------
| approxkl           | 0.00016455169  |
| clipfrac           | 0.00109375     |
| explained_variance | 0.963          |
| fps                | 998            |
| n_updates          | 1033           |
| policy_entropy     | 0.120455496    |
| policy_loss        | -2.4767816e-05 |
| serial_timesteps   | 3305600        |
| time_elapsed       | 3.46e+03       |
| total_timesteps    | 3305600        |
| value_loss         | 5.241795       |
---------------------------------------
--------------------------------------
| approxkl           | 0.00010451286 |
| clipfrac           | 0.000625      |
| explained_variance | 0.965         |
| fps                | 945           |
| n_updates          | 1034          |
| policy_entropy     | 0.12401712    |
| policy_loss        | 1.9756852e-05 |
| serial_timesteps   | 3308800       |
| time_elapsed       | 3.46e+03      |
| total_timesteps    | 3308800       |
| value_loss         | 5.2264056     |
------------

-------------------------------------
| approxkl           | 6.975696e-05 |
| clipfrac           | 7.8125e-05   |
| explained_variance | 0.976        |
| fps                | 969          |
| n_updates          | 1050         |
| policy_entropy     | 0.104865596  |
| policy_loss        | 8.167922e-06 |
| serial_timesteps   | 3360000      |
| time_elapsed       | 3.51e+03     |
| total_timesteps    | 3360000      |
| value_loss         | 3.351924     |
-------------------------------------
--------------------------------------
| approxkl           | 9.254673e-05  |
| clipfrac           | 0.00015625    |
| explained_variance | 0.959         |
| fps                | 942           |
| n_updates          | 1051          |
| policy_entropy     | 0.15841305    |
| policy_loss        | -8.593188e-05 |
| serial_timesteps   | 3363200       |
| time_elapsed       | 3.52e+03      |
| total_timesteps    | 3363200       |
| value_loss         | 2.2797284     |
--------------------------------------

--------------------------------------
| approxkl           | 0.00017705208 |
| clipfrac           | 0.001875      |
| explained_variance | 0.971         |
| fps                | 1027          |
| n_updates          | 1067          |
| policy_entropy     | 0.118804894   |
| policy_loss        | 3.853284e-05  |
| serial_timesteps   | 3414400       |
| time_elapsed       | 3.57e+03      |
| total_timesteps    | 3414400       |
| value_loss         | 3.0374908     |
--------------------------------------
--------------------------------------
| approxkl           | 8.7037624e-05 |
| clipfrac           | 7.8125e-05    |
| explained_variance | 0.953         |
| fps                | 1020          |
| n_updates          | 1068          |
| policy_entropy     | 0.107937306   |
| policy_loss        | -9.627118e-06 |
| serial_timesteps   | 3417600       |
| time_elapsed       | 3.57e+03      |
| total_timesteps    | 3417600       |
| value_loss         | 5.708466      |
-------------------------

---------------------------------------
| approxkl           | 0.00090258266  |
| clipfrac           | 0.011484375    |
| explained_variance | -2.5           |
| fps                | 958            |
| n_updates          | 1083           |
| policy_entropy     | 0.18298693     |
| policy_loss        | -0.00066923245 |
| serial_timesteps   | 3465600        |
| time_elapsed       | 3.62e+03       |
| total_timesteps    | 3465600        |
| value_loss         | 0.14702603     |
---------------------------------------
---------------------------------------
| approxkl           | 0.0006211463   |
| clipfrac           | 0.0074218754   |
| explained_variance | 0.974          |
| fps                | 1025           |
| n_updates          | 1084           |
| policy_entropy     | 0.12661067     |
| policy_loss        | -2.8838589e-05 |
| serial_timesteps   | 3468800        |
| time_elapsed       | 3.63e+03       |
| total_timesteps    | 3468800        |
| value_loss         | 3.057461       |


-------------------------------------
| approxkl           | 0.0068405285 |
| clipfrac           | 0.023203123  |
| explained_variance | 0.952        |
| fps                | 1007         |
| n_updates          | 1100         |
| policy_entropy     | 0.07246049   |
| policy_loss        | 0.0012100742 |
| serial_timesteps   | 3520000      |
| time_elapsed       | 3.68e+03     |
| total_timesteps    | 3520000      |
| value_loss         | 5.850551     |
-------------------------------------
--------------------------------------
| approxkl           | 0.002531324   |
| clipfrac           | 0.0175        |
| explained_variance | 0.964         |
| fps                | 1029          |
| n_updates          | 1101          |
| policy_entropy     | 0.09075566    |
| policy_loss        | 0.00095246773 |
| serial_timesteps   | 3523200       |
| time_elapsed       | 3.68e+03      |
| total_timesteps    | 3523200       |
| value_loss         | 4.85937       |
--------------------------------------

---------------------------------------
| approxkl           | 0.0003515442   |
| clipfrac           | 0.00265625     |
| explained_variance | 0.415          |
| fps                | 1012           |
| n_updates          | 1116           |
| policy_entropy     | 0.16718459     |
| policy_loss        | -0.00033108544 |
| serial_timesteps   | 3571200        |
| time_elapsed       | 3.73e+03       |
| total_timesteps    | 3571200        |
| value_loss         | 1.136583       |
---------------------------------------
--------------------------------------
| approxkl           | 0.000147188   |
| clipfrac           | 0.00125       |
| explained_variance | 0.976         |
| fps                | 1010          |
| n_updates          | 1117          |
| policy_entropy     | 0.09744468    |
| policy_loss        | -6.290526e-07 |
| serial_timesteps   | 3574400       |
| time_elapsed       | 3.73e+03      |
| total_timesteps    | 3574400       |
| value_loss         | 3.4540482     |
------------

--------------------------------------
| approxkl           | 0.00024318727 |
| clipfrac           | 0.00171875    |
| explained_variance | -2.97         |
| fps                | 950           |
| n_updates          | 1133          |
| policy_entropy     | 0.1978749     |
| policy_loss        | -0.0005408961 |
| serial_timesteps   | 3625600       |
| time_elapsed       | 3.78e+03      |
| total_timesteps    | 3625600       |
| value_loss         | 0.38511172    |
--------------------------------------
---------------------------------------
| approxkl           | 0.00017999456  |
| clipfrac           | 0.0012500001   |
| explained_variance | 0.966          |
| fps                | 974            |
| n_updates          | 1134           |
| policy_entropy     | 0.097408146    |
| policy_loss        | -2.2421702e-05 |
| serial_timesteps   | 3628800        |
| time_elapsed       | 3.79e+03       |
| total_timesteps    | 3628800        |
| value_loss         | 5.2144775      |
-------------

--------------------------------------
| approxkl           | 6.990756e-05  |
| clipfrac           | 0.0003125     |
| explained_variance | 0.971         |
| fps                | 1020          |
| n_updates          | 1150          |
| policy_entropy     | 0.12154065    |
| policy_loss        | 1.7625243e-05 |
| serial_timesteps   | 3680000       |
| time_elapsed       | 3.84e+03      |
| total_timesteps    | 3680000       |
| value_loss         | 3.1921253     |
--------------------------------------
--------------------------------------
| approxkl           | 2.6594545e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.97          |
| fps                | 1020          |
| n_updates          | 1151          |
| policy_entropy     | 0.1379298     |
| policy_loss        | 3.768124e-05  |
| serial_timesteps   | 3683200       |
| time_elapsed       | 3.84e+03      |
| total_timesteps    | 3683200       |
| value_loss         | 4.855055      |
-------------------------

--------------------------------------
| approxkl           | 8.421192e-05  |
| clipfrac           | 0.00015625    |
| explained_variance | 0.972         |
| fps                | 966           |
| n_updates          | 1167          |
| policy_entropy     | 0.1557884     |
| policy_loss        | -9.801216e-05 |
| serial_timesteps   | 3734400       |
| time_elapsed       | 3.9e+03       |
| total_timesteps    | 3734400       |
| value_loss         | 2.588156      |
--------------------------------------
--------------------------------------
| approxkl           | 5.2386873e-05 |
| clipfrac           | 7.8125e-05    |
| explained_variance | 0.967         |
| fps                | 983           |
| n_updates          | 1168          |
| policy_entropy     | 0.13120356    |
| policy_loss        | 3.9741157e-05 |
| serial_timesteps   | 3737600       |
| time_elapsed       | 3.9e+03       |
| total_timesteps    | 3737600       |
| value_loss         | 4.874106      |
-------------------------

---------------------------------------
| approxkl           | 2.680619e-05   |
| clipfrac           | 0.0            |
| explained_variance | 0.949          |
| fps                | 995            |
| n_updates          | 1184           |
| policy_entropy     | 0.07403783     |
| policy_loss        | -5.1615236e-05 |
| serial_timesteps   | 3788800        |
| time_elapsed       | 3.95e+03       |
| total_timesteps    | 3788800        |
| value_loss         | 6.889436       |
---------------------------------------
--------------------------------------
| approxkl           | 1.5065569e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.961         |
| fps                | 1016          |
| n_updates          | 1185          |
| policy_entropy     | 0.112636395   |
| policy_loss        | 1.7601178e-05 |
| serial_timesteps   | 3792000       |
| time_elapsed       | 3.95e+03      |
| total_timesteps    | 3792000       |
| value_loss         | 5.8857956     |
------------

--------------------------------------
| approxkl           | 0.0004481469  |
| clipfrac           | 0.00453125    |
| explained_variance | 0.977         |
| fps                | 951           |
| n_updates          | 1200          |
| policy_entropy     | 0.17388245    |
| policy_loss        | 2.4209097e-05 |
| serial_timesteps   | 3840000       |
| time_elapsed       | 4e+03         |
| total_timesteps    | 3840000       |
| value_loss         | 3.0014453     |
--------------------------------------
---------------------------------------
| approxkl           | 0.00019710799  |
| clipfrac           | 0.000859375    |
| explained_variance | 0.967          |
| fps                | 928            |
| n_updates          | 1201           |
| policy_entropy     | 0.1976789      |
| policy_loss        | -7.1172566e-05 |
| serial_timesteps   | 3843200        |
| time_elapsed       | 4.01e+03       |
| total_timesteps    | 3843200        |
| value_loss         | 5.3080397      |
-------------

--------------------------------------
| approxkl           | 2.3298662e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.974         |
| fps                | 914           |
| n_updates          | 1217          |
| policy_entropy     | 0.19817477    |
| policy_loss        | 2.6284604e-05 |
| serial_timesteps   | 3894400       |
| time_elapsed       | 4.06e+03      |
| total_timesteps    | 3894400       |
| value_loss         | 3.078179      |
--------------------------------------
---------------------------------------
| approxkl           | 2.5873576e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.98           |
| fps                | 902            |
| n_updates          | 1218           |
| policy_entropy     | 0.18802972     |
| policy_loss        | -0.00018692881 |
| serial_timesteps   | 3897600        |
| time_elapsed       | 4.06e+03       |
| total_timesteps    | 3897600        |
| value_loss         | 3.1545274      |
-------------

--------------------------------------
| approxkl           | 0.002311864   |
| clipfrac           | 0.023046875   |
| explained_variance | 0.991         |
| fps                | 1013          |
| n_updates          | 1234          |
| policy_entropy     | 0.17446283    |
| policy_loss        | 0.00017951685 |
| serial_timesteps   | 3948800       |
| time_elapsed       | 4.11e+03      |
| total_timesteps    | 3948800       |
| value_loss         | 0.91046625    |
--------------------------------------
--------------------------------------
| approxkl           | 0.0007189907  |
| clipfrac           | 0.008828125   |
| explained_variance | 0.969         |
| fps                | 992           |
| n_updates          | 1235          |
| policy_entropy     | 0.16802174    |
| policy_loss        | -9.056926e-05 |
| serial_timesteps   | 3952000       |
| time_elapsed       | 4.12e+03      |
| total_timesteps    | 3952000       |
| value_loss         | 4.536161      |
-------------------------

--------------------------------------
| approxkl           | 0.002743925   |
| clipfrac           | 0.028828125   |
| explained_variance | 0.976         |
| fps                | 992           |
| n_updates          | 1251          |
| policy_entropy     | 0.22474903    |
| policy_loss        | -0.0005002547 |
| serial_timesteps   | 4003200       |
| time_elapsed       | 4.17e+03      |
| total_timesteps    | 4003200       |
| value_loss         | 0.54679966    |
--------------------------------------
--------------------------------------
| approxkl           | 0.0013015519  |
| clipfrac           | 0.0153125     |
| explained_variance | 0.97          |
| fps                | 1026          |
| n_updates          | 1252          |
| policy_entropy     | 0.121583074   |
| policy_loss        | 0.00033841442 |
| serial_timesteps   | 4006400       |
| time_elapsed       | 4.17e+03      |
| total_timesteps    | 4006400       |
| value_loss         | 4.643093      |
-------------------------

--------------------------------------
| approxkl           | 5.7550795e-05 |
| clipfrac           | 7.8125e-05    |
| explained_variance | 0.933         |
| fps                | 1051          |
| n_updates          | 1268          |
| policy_entropy     | 0.13989195    |
| policy_loss        | 0.00016444072 |
| serial_timesteps   | 4057600       |
| time_elapsed       | 4.23e+03      |
| total_timesteps    | 4057600       |
| value_loss         | 8.845485      |
--------------------------------------
---------------------------------------
| approxkl           | 0.000103165425 |
| clipfrac           | 0.000546875    |
| explained_variance | 0.97           |
| fps                | 1053           |
| n_updates          | 1269           |
| policy_entropy     | 0.15229522     |
| policy_loss        | -0.00017041937 |
| serial_timesteps   | 4060800        |
| time_elapsed       | 4.23e+03       |
| total_timesteps    | 4060800        |
| value_loss         | 3.5582793      |
-------------

---------------------------------------
| approxkl           | 2.6035108e-05  |
| clipfrac           | 7.8125e-05     |
| explained_variance | 0.973          |
| fps                | 1050           |
| n_updates          | 1285           |
| policy_entropy     | 0.15495943     |
| policy_loss        | -8.4386615e-05 |
| serial_timesteps   | 4112000        |
| time_elapsed       | 4.28e+03       |
| total_timesteps    | 4112000        |
| value_loss         | 3.0679965      |
---------------------------------------
--------------------------------------
| approxkl           | 1.3972378e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.967         |
| fps                | 1069          |
| n_updates          | 1286          |
| policy_entropy     | 0.10283811    |
| policy_loss        | 5.130917e-06  |
| serial_timesteps   | 4115200       |
| time_elapsed       | 4.28e+03      |
| total_timesteps    | 4115200       |
| value_loss         | 5.572083      |
------------

---------------------------------------
| approxkl           | 8.972458e-06   |
| clipfrac           | 0.0            |
| explained_variance | 0.976          |
| fps                | 991            |
| n_updates          | 1301           |
| policy_entropy     | 0.13343324     |
| policy_loss        | -1.1721253e-05 |
| serial_timesteps   | 4163200        |
| time_elapsed       | 4.33e+03       |
| total_timesteps    | 4163200        |
| value_loss         | 3.4232357      |
---------------------------------------
-------------------------------------
| approxkl           | 3.840966e-05 |
| clipfrac           | 0.000234375  |
| explained_variance | 0.922        |
| fps                | 1017         |
| n_updates          | 1302         |
| policy_entropy     | 0.15115295   |
| policy_loss        | 5.989075e-05 |
| serial_timesteps   | 4166400      |
| time_elapsed       | 4.34e+03     |
| total_timesteps    | 4166400      |
| value_loss         | 5.3227143    |
------------------------

--------------------------------------
| approxkl           | 2.7417122e-05 |
| clipfrac           | 7.8125e-05    |
| explained_variance | 0.923         |
| fps                | 1029          |
| n_updates          | 1318          |
| policy_entropy     | 0.1004198     |
| policy_loss        | -1.745142e-05 |
| serial_timesteps   | 4217600       |
| time_elapsed       | 4.39e+03      |
| total_timesteps    | 4217600       |
| value_loss         | 8.495124      |
--------------------------------------
--------------------------------------
| approxkl           | 3.9170238e-05 |
| clipfrac           | 0.000234375   |
| explained_variance | 0.893         |
| fps                | 1024          |
| n_updates          | 1319          |
| policy_entropy     | 0.1187094     |
| policy_loss        | 0.00012422315 |
| serial_timesteps   | 4220800       |
| time_elapsed       | 4.39e+03      |
| total_timesteps    | 4220800       |
| value_loss         | 8.769196      |
-------------------------

---------------------------------------
| approxkl           | 9.434784e-05   |
| clipfrac           | 0.000234375    |
| explained_variance | 0.909          |
| fps                | 1013           |
| n_updates          | 1335           |
| policy_entropy     | 0.19981351     |
| policy_loss        | -1.1382846e-05 |
| serial_timesteps   | 4272000        |
| time_elapsed       | 4.44e+03       |
| total_timesteps    | 4272000        |
| value_loss         | 6.3390164      |
---------------------------------------
--------------------------------------
| approxkl           | 6.076846e-05  |
| clipfrac           | 0.0           |
| explained_variance | 0.928         |
| fps                | 1061          |
| n_updates          | 1336          |
| policy_entropy     | 0.16264774    |
| policy_loss        | -0.0001439958 |
| serial_timesteps   | 4275200       |
| time_elapsed       | 4.44e+03      |
| total_timesteps    | 4275200       |
| value_loss         | 5.2444143     |
------------

---------------------------------------
| approxkl           | 8.991372e-05   |
| clipfrac           | 0.000546875    |
| explained_variance | 0.965          |
| fps                | 998            |
| n_updates          | 1351           |
| policy_entropy     | 0.11642779     |
| policy_loss        | -2.4927034e-05 |
| serial_timesteps   | 4323200        |
| time_elapsed       | 4.49e+03       |
| total_timesteps    | 4323200        |
| value_loss         | 5.366314       |
---------------------------------------
--------------------------------------
| approxkl           | 4.074897e-05  |
| clipfrac           | 7.8125e-05    |
| explained_variance | 0.942         |
| fps                | 1017          |
| n_updates          | 1352          |
| policy_entropy     | 0.12224788    |
| policy_loss        | -2.323672e-05 |
| serial_timesteps   | 4326400       |
| time_elapsed       | 4.49e+03      |
| total_timesteps    | 4326400       |
| value_loss         | 5.216187      |
------------

---------------------------------------
| approxkl           | 3.2396798e-05  |
| clipfrac           | 0.00015625     |
| explained_variance | 0.931          |
| fps                | 1058           |
| n_updates          | 1367           |
| policy_entropy     | 0.07948023     |
| policy_loss        | -8.8129644e-05 |
| serial_timesteps   | 4374400        |
| time_elapsed       | 4.54e+03       |
| total_timesteps    | 4374400        |
| value_loss         | 9.672561       |
---------------------------------------
---------------------------------------
| approxkl           | 0.00015527301  |
| clipfrac           | 0.000234375    |
| explained_variance | 0.874          |
| fps                | 1030           |
| n_updates          | 1368           |
| policy_entropy     | 0.18042845     |
| policy_loss        | -3.7696696e-05 |
| serial_timesteps   | 4377600        |
| time_elapsed       | 4.54e+03       |
| total_timesteps    | 4377600        |
| value_loss         | 4.5274086      |


---------------------------------------
| approxkl           | 4.955969e-05   |
| clipfrac           | 0.00015625     |
| explained_variance | 0.973          |
| fps                | 1037           |
| n_updates          | 1384           |
| policy_entropy     | 0.14120804     |
| policy_loss        | -0.00011430591 |
| serial_timesteps   | 4428800        |
| time_elapsed       | 4.59e+03       |
| total_timesteps    | 4428800        |
| value_loss         | 4.046079       |
---------------------------------------
---------------------------------------
| approxkl           | 1.3038758e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.965          |
| fps                | 1011           |
| n_updates          | 1385           |
| policy_entropy     | 0.16347656     |
| policy_loss        | -9.9200755e-05 |
| serial_timesteps   | 4432000        |
| time_elapsed       | 4.6e+03        |
| total_timesteps    | 4432000        |
| value_loss         | 4.266202       |


--------------------------------------
| approxkl           | 0.00021104433 |
| clipfrac           | 0.00234375    |
| explained_variance | 0.968         |
| fps                | 1003          |
| n_updates          | 1401          |
| policy_entropy     | 0.11626343    |
| policy_loss        | 8.1485505e-06 |
| serial_timesteps   | 4483200       |
| time_elapsed       | 4.65e+03      |
| total_timesteps    | 4483200       |
| value_loss         | 5.2656193     |
--------------------------------------
---------------------------------------
| approxkl           | 7.726968e-05   |
| clipfrac           | 0.00015625     |
| explained_variance | 0.987          |
| fps                | 1008           |
| n_updates          | 1402           |
| policy_entropy     | 0.1921741      |
| policy_loss        | -0.00031061424 |
| serial_timesteps   | 4486400        |
| time_elapsed       | 4.65e+03       |
| total_timesteps    | 4486400        |
| value_loss         | 1.2549908      |
-------------

---------------------------------------
| approxkl           | 4.8180927e-06  |
| clipfrac           | 0.0            |
| explained_variance | 0.976          |
| fps                | 1001           |
| n_updates          | 1418           |
| policy_entropy     | 0.16986342     |
| policy_loss        | -8.3895775e-06 |
| serial_timesteps   | 4537600        |
| time_elapsed       | 4.7e+03        |
| total_timesteps    | 4537600        |
| value_loss         | 2.8363245      |
---------------------------------------
---------------------------------------
| approxkl           | 1.9606603e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.976          |
| fps                | 1004           |
| n_updates          | 1419           |
| policy_entropy     | 0.16634806     |
| policy_loss        | -0.00016258986 |
| serial_timesteps   | 4540800        |
| time_elapsed       | 4.71e+03       |
| total_timesteps    | 4540800        |
| value_loss         | 2.8026896      |


--------------------------------------
| approxkl           | 0.0009906173  |
| clipfrac           | 0.0109375     |
| explained_variance | 0.972         |
| fps                | 1038          |
| n_updates          | 1434          |
| policy_entropy     | 0.16510347    |
| policy_loss        | 0.00026529783 |
| serial_timesteps   | 4588800       |
| time_elapsed       | 4.75e+03      |
| total_timesteps    | 4588800       |
| value_loss         | 3.0018332     |
--------------------------------------
---------------------------------------
| approxkl           | 0.00028425155  |
| clipfrac           | 0.0032812501   |
| explained_variance | 0.968          |
| fps                | 1047           |
| n_updates          | 1435           |
| policy_entropy     | 0.107484266    |
| policy_loss        | -6.0456023e-05 |
| serial_timesteps   | 4592000        |
| time_elapsed       | 4.76e+03       |
| total_timesteps    | 4592000        |
| value_loss         | 5.539808       |
-------------

---------------------------------------
| approxkl           | 0.0001380365   |
| clipfrac           | 0.000390625    |
| explained_variance | 0.964          |
| fps                | 963            |
| n_updates          | 1450           |
| policy_entropy     | 0.14788589     |
| policy_loss        | -9.4692936e-05 |
| serial_timesteps   | 4640000        |
| time_elapsed       | 4.8e+03        |
| total_timesteps    | 4640000        |
| value_loss         | 4.7732916      |
---------------------------------------
--------------------------------------
| approxkl           | 5.3571934e-05 |
| clipfrac           | 7.8125e-05    |
| explained_variance | 0.967         |
| fps                | 1027          |
| n_updates          | 1451          |
| policy_entropy     | 0.11572032    |
| policy_loss        | 1.0949373e-06 |
| serial_timesteps   | 4643200       |
| time_elapsed       | 4.81e+03      |
| total_timesteps    | 4643200       |
| value_loss         | 5.6507344     |
------------

--------------------------------------
| approxkl           | 1.5827412e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.963         |
| fps                | 1039          |
| n_updates          | 1466          |
| policy_entropy     | 0.09553197    |
| policy_loss        | 3.1437353e-06 |
| serial_timesteps   | 4691200       |
| time_elapsed       | 4.85e+03      |
| total_timesteps    | 4691200       |
| value_loss         | 5.708305      |
--------------------------------------
----------------------------------------
| approxkl           | 8.699368e-06    |
| clipfrac           | 0.0             |
| explained_variance | 0.972           |
| fps                | 1019            |
| n_updates          | 1467            |
| policy_entropy     | 0.12264907      |
| policy_loss        | -1.48234885e-05 |
| serial_timesteps   | 4694400         |
| time_elapsed       | 4.86e+03        |
| total_timesteps    | 4694400         |
| value_loss         | 4.7434607       |
-

--------------------------------------
| approxkl           | 5.7534606e-05 |
| clipfrac           | 0.00015625    |
| explained_variance | 0.972         |
| fps                | 985           |
| n_updates          | 1483          |
| policy_entropy     | 0.13614455    |
| policy_loss        | 9.1341135e-06 |
| serial_timesteps   | 4745600       |
| time_elapsed       | 4.91e+03      |
| total_timesteps    | 4745600       |
| value_loss         | 4.095364      |
--------------------------------------
--------------------------------------
| approxkl           | 9.3678056e-05 |
| clipfrac           | 0.000234375   |
| explained_variance | 0.972         |
| fps                | 998           |
| n_updates          | 1484          |
| policy_entropy     | 0.17636803    |
| policy_loss        | -6.805912e-05 |
| serial_timesteps   | 4748800       |
| time_elapsed       | 4.91e+03      |
| total_timesteps    | 4748800       |
| value_loss         | 3.924798      |
-------------------------

--------------------------------------
| approxkl           | 4.2925414e-05 |
| clipfrac           | 0.00015625    |
| explained_variance | 0.974         |
| fps                | 939           |
| n_updates          | 1500          |
| policy_entropy     | 0.15847507    |
| policy_loss        | -6.718162e-05 |
| serial_timesteps   | 4800000       |
| time_elapsed       | 4.97e+03      |
| total_timesteps    | 4800000       |
| value_loss         | 3.5272346     |
--------------------------------------
---------------------------------------
| approxkl           | 4.177331e-05   |
| clipfrac           | 0.0            |
| explained_variance | 0.97           |
| fps                | 1035           |
| n_updates          | 1501           |
| policy_entropy     | 0.219242       |
| policy_loss        | -3.9779712e-05 |
| serial_timesteps   | 4803200        |
| time_elapsed       | 4.97e+03       |
| total_timesteps    | 4803200        |
| value_loss         | 2.2239735      |
-------------

---------------------------------------
| approxkl           | 0.00044063738  |
| clipfrac           | 0.004921875    |
| explained_variance | 0.978          |
| fps                | 9              |
| n_updates          | 1516           |
| policy_entropy     | 0.17497782     |
| policy_loss        | -0.00015856221 |
| serial_timesteps   | 4851200        |
| time_elapsed       | 5.34e+03       |
| total_timesteps    | 4851200        |
| value_loss         | 2.8437822      |
---------------------------------------
---------------------------------------
| approxkl           | 0.00097438274  |
| clipfrac           | 0.010234375    |
| explained_variance | 0.968          |
| fps                | 1097           |
| n_updates          | 1517           |
| policy_entropy     | 0.22387877     |
| policy_loss        | -0.00056709046 |
| serial_timesteps   | 4854400        |
| time_elapsed       | 5.66e+03       |
| total_timesteps    | 4854400        |
| value_loss         | 0.31590372     |


---------------------------------------
| approxkl           | 0.00023246     |
| clipfrac           | 0.001484375    |
| explained_variance | -4.96          |
| fps                | 1181           |
| n_updates          | 1532           |
| policy_entropy     | 0.20832174     |
| policy_loss        | -0.00041194676 |
| serial_timesteps   | 4902400        |
| time_elapsed       | 5.73e+03       |
| total_timesteps    | 4902400        |
| value_loss         | 0.61837167     |
---------------------------------------
--------------------------------------
| approxkl           | 0.00025698333 |
| clipfrac           | 0.00171875    |
| explained_variance | 0.965         |
| fps                | 1184          |
| n_updates          | 1533          |
| policy_entropy     | 0.11737829    |
| policy_loss        | -9.867974e-05 |
| serial_timesteps   | 4905600       |
| time_elapsed       | 5.73e+03      |
| total_timesteps    | 4905600       |
| value_loss         | 5.163671      |
------------

--------------------------------------
| approxkl           | 0.00058714155 |
| clipfrac           | 0.00390625    |
| explained_variance | 0.977         |
| fps                | 1139          |
| n_updates          | 1549          |
| policy_entropy     | 0.16881725    |
| policy_loss        | 0.0006411596  |
| serial_timesteps   | 4956800       |
| time_elapsed       | 5.78e+03      |
| total_timesteps    | 4956800       |
| value_loss         | 3.1428747     |
--------------------------------------
-------------------------------------
| approxkl           | 0.000638787  |
| clipfrac           | 0.006328125  |
| explained_variance | 0.951        |
| fps                | 1166         |
| n_updates          | 1550         |
| policy_entropy     | 0.1387732    |
| policy_loss        | 0.0020091622 |
| serial_timesteps   | 4960000      |
| time_elapsed       | 5.78e+03     |
| total_timesteps    | 4960000      |
| value_loss         | 6.2282953    |
-------------------------------------

## Testing lesson3

In [65]:
# model_lesson3 = PPO2.load(model_names[3])
test(env_lesson3, model_lesson3, render = False)

Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished
Episode 7 finished
Episode 8 finished
Episode 9 finished
Episode 10 finished
Episode 11 finished
Episode 12 finished
Episode 13 finished
Episode 14 finished
Episode 15 finished
Episode 16 finished
Episode 17 finished
Episode 18 finished
Episode 19 finished
Episode 20 finished
Episode 21 finished
Episode 22 finished
Episode 23 finished
Episode 24 finished
Episode 25 finished
Episode 26 finished
Episode 27 finished
Episode 28 finished
Episode 29 finished
Episode 30 finished
Episode 31 finished
Episode 32 finished
Episode 33 finished
Episode 34 finished
Episode 35 finished
Episode 36 finished
Episode 37 finished
Episode 38 finished
Episode 39 finished
Episode 40 finished
Episode 41 finished
Episode 42 finished
Episode 43 finished
Episode 44 finished
Episode 45 finished
Episode 46 finished
Episode 47 finished
Episode 48 finished
Episode 49 finished
Episode 50

## Training lesson 4
### 11x11 grid with 32 wooden boxes

In [66]:
config_lesson4 = wood_box_lesson4_env()
env_lesson4 = initialize_env(config_lesson4)

In [67]:
# model_lesson3 = PPO2.load(load_path = model_names[3],
#                           tensorboard_log = "./ppo2_pommerman_box_collect_tensorboard/")
model_lesson4 = train(model_name = model_names[4], 
                      model = model_lesson3,
                      env = env_lesson4, 
                      n_steps = n_steps, 
                      total_timesteps = total_timestep)
# model_lesson4.save(model_names[4])

---------------------------------------
| approxkl           | 0.00011778335  |
| clipfrac           | 0.000234375    |
| explained_variance | 0.979          |
| fps                | 1103           |
| n_updates          | 1              |
| policy_entropy     | 0.16538613     |
| policy_loss        | -6.4058455e-05 |
| serial_timesteps   | 3200           |
| time_elapsed       | 1.91e-06       |
| total_timesteps    | 3200           |
| value_loss         | 2.1946805      |
---------------------------------------
---------------------------------------
| approxkl           | 1.4984969e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.966          |
| fps                | 1112           |
| n_updates          | 2              |
| policy_entropy     | 0.122646004    |
| policy_loss        | -5.7730304e-05 |
| serial_timesteps   | 6400           |
| time_elapsed       | 2.91           |
| total_timesteps    | 6400           |
| value_loss         | 4.0503445      |


--------------------------------------
| approxkl           | 0.003004672   |
| clipfrac           | 0.027265625   |
| explained_variance | -4.53         |
| fps                | 1039          |
| n_updates          | 18            |
| policy_entropy     | 0.21557593    |
| policy_loss        | -0.0006326508 |
| serial_timesteps   | 57600         |
| time_elapsed       | 49.5          |
| total_timesteps    | 57600         |
| value_loss         | 0.2570749     |
--------------------------------------
---------------------------------------
| approxkl           | 0.00011939183  |
| clipfrac           | 0.00093750004  |
| explained_variance | 0.96           |
| fps                | 1104           |
| n_updates          | 19             |
| policy_entropy     | 0.10430935     |
| policy_loss        | -4.8181264e-05 |
| serial_timesteps   | 60800          |
| time_elapsed       | 52.6           |
| total_timesteps    | 60800          |
| value_loss         | 5.2871594      |
-------------

--------------------------------------
| approxkl           | 0.00030216516 |
| clipfrac           | 0.00296875    |
| explained_variance | 0.961         |
| fps                | 1020          |
| n_updates          | 34            |
| policy_entropy     | 0.17954674    |
| policy_loss        | 0.00037518138 |
| serial_timesteps   | 108800        |
| time_elapsed       | 99.8          |
| total_timesteps    | 108800        |
| value_loss         | 3.529798      |
--------------------------------------
--------------------------------------
| approxkl           | 0.00010243518 |
| clipfrac           | 0.00078125    |
| explained_variance | 0.939         |
| fps                | 1036          |
| n_updates          | 35            |
| policy_entropy     | 0.118767835   |
| policy_loss        | 5.1750838e-05 |
| serial_timesteps   | 112000        |
| time_elapsed       | 103           |
| total_timesteps    | 112000        |
| value_loss         | 6.0841074     |
-------------------------

--------------------------------------
| approxkl           | 3.844592e-05  |
| clipfrac           | 7.8125e-05    |
| explained_variance | 0.973         |
| fps                | 878           |
| n_updates          | 51            |
| policy_entropy     | 0.13600591    |
| policy_loss        | -0.0001519391 |
| serial_timesteps   | 163200        |
| time_elapsed       | 149           |
| total_timesteps    | 163200        |
| value_loss         | 3.1163006     |
--------------------------------------
---------------------------------------
| approxkl           | 1.3861432e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.942          |
| fps                | 750            |
| n_updates          | 52             |
| policy_entropy     | 0.09201777     |
| policy_loss        | -4.5221073e-05 |
| serial_timesteps   | 166400         |
| time_elapsed       | 153            |
| total_timesteps    | 166400         |
| value_loss         | 7.810003       |
-------------

---------------------------------------
| approxkl           | 5.9940226e-07  |
| clipfrac           | 0.0            |
| explained_variance | 0.952          |
| fps                | 918            |
| n_updates          | 67             |
| policy_entropy     | 0.06338045     |
| policy_loss        | -2.5726856e-06 |
| serial_timesteps   | 214400         |
| time_elapsed       | 216            |
| total_timesteps    | 214400         |
| value_loss         | 6.5745707      |
---------------------------------------
---------------------------------------
| approxkl           | 2.6833932e-06  |
| clipfrac           | 0.0            |
| explained_variance | 0.969          |
| fps                | 845            |
| n_updates          | 68             |
| policy_entropy     | 0.1417643      |
| policy_loss        | -1.0418743e-05 |
| serial_timesteps   | 217600         |
| time_elapsed       | 220            |
| total_timesteps    | 217600         |
| value_loss         | 4.166771       |


--------------------------------------
| approxkl           | 1.998473e-05  |
| clipfrac           | 0.0           |
| explained_variance | 0.957         |
| fps                | 1004          |
| n_updates          | 83            |
| policy_entropy     | 0.14712074    |
| policy_loss        | -7.305753e-05 |
| serial_timesteps   | 265600        |
| time_elapsed       | 269           |
| total_timesteps    | 265600        |
| value_loss         | 3.8641894     |
--------------------------------------
-------------------------------------
| approxkl           | 7.577223e-06 |
| clipfrac           | 0.0          |
| explained_variance | 0.964        |
| fps                | 938          |
| n_updates          | 84           |
| policy_entropy     | 0.08662515   |
| policy_loss        | 2.602469e-06 |
| serial_timesteps   | 268800       |
| time_elapsed       | 272          |
| total_timesteps    | 268800       |
| value_loss         | 5.4979825    |
-------------------------------------

--------------------------------------
| approxkl           | 0.0006475122  |
| clipfrac           | 0.0074218754  |
| explained_variance | -3.08         |
| fps                | 1067          |
| n_updates          | 100           |
| policy_entropy     | 0.2260907     |
| policy_loss        | -0.0006751846 |
| serial_timesteps   | 320000        |
| time_elapsed       | 323           |
| total_timesteps    | 320000        |
| value_loss         | 0.9138979     |
--------------------------------------
--------------------------------------
| approxkl           | 0.0006333792  |
| clipfrac           | 0.008515625   |
| explained_variance | 0.931         |
| fps                | 1094          |
| n_updates          | 101           |
| policy_entropy     | 0.18342602    |
| policy_loss        | 0.00089945376 |
| serial_timesteps   | 323200        |
| time_elapsed       | 326           |
| total_timesteps    | 323200        |
| value_loss         | 6.8013306     |
-------------------------

---------------------------------------
| approxkl           | 0.00068566285  |
| clipfrac           | 0.0075000003   |
| explained_variance | 0.764          |
| fps                | 998            |
| n_updates          | 117            |
| policy_entropy     | 0.28931487     |
| policy_loss        | -1.4876798e-05 |
| serial_timesteps   | 374400         |
| time_elapsed       | 375            |
| total_timesteps    | 374400         |
| value_loss         | 3.200675       |
---------------------------------------
-------------------------------------
| approxkl           | 0.0007736186 |
| clipfrac           | 0.008203125  |
| explained_variance | 0.966        |
| fps                | 1025         |
| n_updates          | 118          |
| policy_entropy     | 0.23689142   |
| policy_loss        | 6.397054e-05 |
| serial_timesteps   | 377600       |
| time_elapsed       | 378          |
| total_timesteps    | 377600       |
| value_loss         | 2.8349817    |
------------------------

--------------------------------------
| approxkl           | 0.00035310624 |
| clipfrac           | 0.0042187497  |
| explained_variance | 0.962         |
| fps                | 1025          |
| n_updates          | 134           |
| policy_entropy     | 0.11035636    |
| policy_loss        | 6.347891e-07  |
| serial_timesteps   | 428800        |
| time_elapsed       | 430           |
| total_timesteps    | 428800        |
| value_loss         | 5.9224796     |
--------------------------------------
--------------------------------------
| approxkl           | 0.00042935487 |
| clipfrac           | 0.00421875    |
| explained_variance | 0.936         |
| fps                | 977           |
| n_updates          | 135           |
| policy_entropy     | 0.18316829    |
| policy_loss        | -0.0002901834 |
| serial_timesteps   | 432000        |
| time_elapsed       | 433           |
| total_timesteps    | 432000        |
| value_loss         | 4.3772936     |
-------------------------

--------------------------------------
| approxkl           | 0.00012116457 |
| clipfrac           | 0.00039062498 |
| explained_variance | 0.72          |
| fps                | 901           |
| n_updates          | 150           |
| policy_entropy     | 0.22583796    |
| policy_loss        | -7.345825e-05 |
| serial_timesteps   | 480000        |
| time_elapsed       | 484           |
| total_timesteps    | 480000        |
| value_loss         | 1.6823161     |
--------------------------------------
--------------------------------------
| approxkl           | 0.0022556954  |
| clipfrac           | 0.022343751   |
| explained_variance | 0.951         |
| fps                | 875           |
| n_updates          | 151           |
| policy_entropy     | 0.17723541    |
| policy_loss        | 0.00055186474 |
| serial_timesteps   | 483200        |
| time_elapsed       | 488           |
| total_timesteps    | 483200        |
| value_loss         | 3.2968473     |
-------------------------

--------------------------------------
| approxkl           | 0.00016075013 |
| clipfrac           | 0.0014062499  |
| explained_variance | 0.932         |
| fps                | 964           |
| n_updates          | 167           |
| policy_entropy     | 0.11128825    |
| policy_loss        | 2.4792702e-05 |
| serial_timesteps   | 534400        |
| time_elapsed       | 541           |
| total_timesteps    | 534400        |
| value_loss         | 6.057846      |
--------------------------------------
--------------------------------------
| approxkl           | 0.00011620652 |
| clipfrac           | 0.000234375   |
| explained_variance | 0.93          |
| fps                | 972           |
| n_updates          | 168           |
| policy_entropy     | 0.19098619    |
| policy_loss        | 3.28622e-05   |
| serial_timesteps   | 537600        |
| time_elapsed       | 544           |
| total_timesteps    | 537600        |
| value_loss         | 3.4653215     |
-------------------------

--------------------------------------
| approxkl           | 5.402978e-05  |
| clipfrac           | 0.0           |
| explained_variance | 0.967         |
| fps                | 999           |
| n_updates          | 183           |
| policy_entropy     | 0.17043507    |
| policy_loss        | 3.7245454e-07 |
| serial_timesteps   | 585600        |
| time_elapsed       | 591           |
| total_timesteps    | 585600        |
| value_loss         | 5.3859806     |
--------------------------------------
--------------------------------------
| approxkl           | 6.6942184e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.963         |
| fps                | 1006          |
| n_updates          | 184           |
| policy_entropy     | 0.16631085    |
| policy_loss        | -2.768025e-05 |
| serial_timesteps   | 588800        |
| time_elapsed       | 595           |
| total_timesteps    | 588800        |
| value_loss         | 3.046387      |
-------------------------

--------------------------------------
| approxkl           | 3.146912e-05  |
| clipfrac           | 0.000234375   |
| explained_variance | 0.967         |
| fps                | 1004          |
| n_updates          | 200           |
| policy_entropy     | 0.21252187    |
| policy_loss        | 1.0159014e-05 |
| serial_timesteps   | 640000        |
| time_elapsed       | 643           |
| total_timesteps    | 640000        |
| value_loss         | 2.704288      |
--------------------------------------
---------------------------------------
| approxkl           | 0.00059559115  |
| clipfrac           | 0.002265625    |
| explained_variance | 0.947          |
| fps                | 1053           |
| n_updates          | 201            |
| policy_entropy     | 0.29914165     |
| policy_loss        | -0.00053072645 |
| serial_timesteps   | 643200         |
| time_elapsed       | 647            |
| total_timesteps    | 643200         |
| value_loss         | 6.931491       |
-------------

---------------------------------------
| approxkl           | 0.00010519283  |
| clipfrac           | 0.00046875     |
| explained_variance | 0.974          |
| fps                | 1102           |
| n_updates          | 217            |
| policy_entropy     | 0.20277423     |
| policy_loss        | -0.00032260336 |
| serial_timesteps   | 694400         |
| time_elapsed       | 697            |
| total_timesteps    | 694400         |
| value_loss         | 3.1992424      |
---------------------------------------
--------------------------------------
| approxkl           | 7.8113095e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.905         |
| fps                | 1150          |
| n_updates          | 218           |
| policy_entropy     | 0.17532939    |
| policy_loss        | 0.00021543665 |
| serial_timesteps   | 697600        |
| time_elapsed       | 699           |
| total_timesteps    | 697600        |
| value_loss         | 9.157115      |
------------

---------------------------------------
| approxkl           | 0.00021862308  |
| clipfrac           | 0.0            |
| explained_variance | 0.969          |
| fps                | 946            |
| n_updates          | 234            |
| policy_entropy     | 0.31367517     |
| policy_loss        | -0.00015124204 |
| serial_timesteps   | 748800         |
| time_elapsed       | 750            |
| total_timesteps    | 748800         |
| value_loss         | 3.8808713      |
---------------------------------------
--------------------------------------
| approxkl           | 0.00013157136 |
| clipfrac           | 0.0           |
| explained_variance | 0.971         |
| fps                | 1020          |
| n_updates          | 235           |
| policy_entropy     | 0.33939546    |
| policy_loss        | 0.00018626079 |
| serial_timesteps   | 752000        |
| time_elapsed       | 753           |
| total_timesteps    | 752000        |
| value_loss         | 5.404568      |
------------

-------------------------------------
| approxkl           | 0.0005234793 |
| clipfrac           | 0.004453125  |
| explained_variance | -3.24        |
| fps                | 1025         |
| n_updates          | 251          |
| policy_entropy     | 0.22920083   |
| policy_loss        | -0.000726468 |
| serial_timesteps   | 803200       |
| time_elapsed       | 805          |
| total_timesteps    | 803200       |
| value_loss         | 0.21826708   |
-------------------------------------
--------------------------------------
| approxkl           | 0.00024803923 |
| clipfrac           | 0.00203125    |
| explained_variance | 0.966         |
| fps                | 1060          |
| n_updates          | 252           |
| policy_entropy     | 0.17201383    |
| policy_loss        | 3.9978644e-05 |
| serial_timesteps   | 806400        |
| time_elapsed       | 809           |
| total_timesteps    | 806400        |
| value_loss         | 2.8237154     |
--------------------------------------

--------------------------------------
| approxkl           | 0.00036149652 |
| clipfrac           | 0.0025        |
| explained_variance | 0.977         |
| fps                | 1208          |
| n_updates          | 268           |
| policy_entropy     | 0.19515392    |
| policy_loss        | 0.00019176706 |
| serial_timesteps   | 857600        |
| time_elapsed       | 852           |
| total_timesteps    | 857600        |
| value_loss         | 2.6100163     |
--------------------------------------
--------------------------------------
| approxkl           | 0.00014595564 |
| clipfrac           | 0.000234375   |
| explained_variance | 0.965         |
| fps                | 1208          |
| n_updates          | 269           |
| policy_entropy     | 0.18563746    |
| policy_loss        | 8.499808e-06  |
| serial_timesteps   | 860800        |
| time_elapsed       | 855           |
| total_timesteps    | 860800        |
| value_loss         | 2.7587936     |
-------------------------

---------------------------------------
| approxkl           | 3.98253e-06    |
| clipfrac           | 0.0            |
| explained_variance | 0.961          |
| fps                | 913            |
| n_updates          | 285            |
| policy_entropy     | 0.15947549     |
| policy_loss        | -2.7710572e-05 |
| serial_timesteps   | 912000         |
| time_elapsed       | 899            |
| total_timesteps    | 912000         |
| value_loss         | 4.730736       |
---------------------------------------
--------------------------------------
| approxkl           | 3.5004139e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.976         |
| fps                | 939           |
| n_updates          | 286           |
| policy_entropy     | 0.18059234    |
| policy_loss        | -8.01161e-06  |
| serial_timesteps   | 915200        |
| time_elapsed       | 903           |
| total_timesteps    | 915200        |
| value_loss         | 2.6528726     |
------------

--------------------------------------
| approxkl           | 4.4783947e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.932         |
| fps                | 1018          |
| n_updates          | 302           |
| policy_entropy     | 0.062993705   |
| policy_loss        | 9.097513e-06  |
| serial_timesteps   | 966400        |
| time_elapsed       | 956           |
| total_timesteps    | 966400        |
| value_loss         | 8.000448      |
--------------------------------------
---------------------------------------
| approxkl           | 2.4470382e-06  |
| clipfrac           | 0.0            |
| explained_variance | 0.951          |
| fps                | 1055           |
| n_updates          | 303            |
| policy_entropy     | 0.07036128     |
| policy_loss        | -2.8135628e-06 |
| serial_timesteps   | 969600         |
| time_elapsed       | 959            |
| total_timesteps    | 969600         |
| value_loss         | 7.4024487      |
-------------

--------------------------------------
| approxkl           | 1.6426993e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.949         |
| fps                | 1221          |
| n_updates          | 318           |
| policy_entropy     | 0.072293006   |
| policy_loss        | 5.096644e-06  |
| serial_timesteps   | 1017600       |
| time_elapsed       | 1e+03         |
| total_timesteps    | 1017600       |
| value_loss         | 7.5449905     |
--------------------------------------
--------------------------------------
| approxkl           | 1.9154445e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.947         |
| fps                | 1196          |
| n_updates          | 319           |
| policy_entropy     | 0.06721099    |
| policy_loss        | 1.0194592e-05 |
| serial_timesteps   | 1020800       |
| time_elapsed       | 1.01e+03      |
| total_timesteps    | 1020800       |
| value_loss         | 7.973021      |
-------------------------

---------------------------------------
| approxkl           | 9.367548e-06   |
| clipfrac           | 0.0            |
| explained_variance | 0.974          |
| fps                | 1207           |
| n_updates          | 335            |
| policy_entropy     | 0.211552       |
| policy_loss        | -6.0333237e-05 |
| serial_timesteps   | 1072000        |
| time_elapsed       | 1.05e+03       |
| total_timesteps    | 1072000        |
| value_loss         | 2.966093       |
---------------------------------------
---------------------------------------
| approxkl           | 5.9067297e-06  |
| clipfrac           | 0.0            |
| explained_variance | 0.977          |
| fps                | 1201           |
| n_updates          | 336            |
| policy_entropy     | 0.15512083     |
| policy_loss        | -1.0167435e-06 |
| serial_timesteps   | 1075200        |
| time_elapsed       | 1.05e+03       |
| total_timesteps    | 1075200        |
| value_loss         | 3.3374693      |


--------------------------------------
| approxkl           | 9.711986e-06  |
| clipfrac           | 0.0           |
| explained_variance | 0.952         |
| fps                | 1220          |
| n_updates          | 352           |
| policy_entropy     | 0.108487055   |
| policy_loss        | 4.5814368e-06 |
| serial_timesteps   | 1126400       |
| time_elapsed       | 1.1e+03       |
| total_timesteps    | 1126400       |
| value_loss         | 6.1011677     |
--------------------------------------
--------------------------------------
| approxkl           | 1.3320549e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.976         |
| fps                | 1211          |
| n_updates          | 353           |
| policy_entropy     | 0.21084616    |
| policy_loss        | 1.2868866e-05 |
| serial_timesteps   | 1129600       |
| time_elapsed       | 1.1e+03       |
| total_timesteps    | 1129600       |
| value_loss         | 2.3184192     |
-------------------------

---------------------------------------
| approxkl           | 9.521487e-06   |
| clipfrac           | 0.0            |
| explained_variance | 0.947          |
| fps                | 1241           |
| n_updates          | 369            |
| policy_entropy     | 0.1115386      |
| policy_loss        | -3.4093115e-05 |
| serial_timesteps   | 1180800        |
| time_elapsed       | 1.14e+03       |
| total_timesteps    | 1180800        |
| value_loss         | 7.323288       |
---------------------------------------
---------------------------------------
| approxkl           | 1.2419358e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.976          |
| fps                | 1188           |
| n_updates          | 370            |
| policy_entropy     | 0.13650402     |
| policy_loss        | -1.2721271e-05 |
| serial_timesteps   | 1184000        |
| time_elapsed       | 1.15e+03       |
| total_timesteps    | 1184000        |
| value_loss         | 3.5381186      |


---------------------------------------
| approxkl           | 7.2922703e-06  |
| clipfrac           | 0.0            |
| explained_variance | 0.973          |
| fps                | 1199           |
| n_updates          | 386            |
| policy_entropy     | 0.17466606     |
| policy_loss        | -2.0362436e-06 |
| serial_timesteps   | 1235200        |
| time_elapsed       | 1.19e+03       |
| total_timesteps    | 1235200        |
| value_loss         | 2.8780942      |
---------------------------------------
-------------------------------------
| approxkl           | 2.323577e-05 |
| clipfrac           | 0.0          |
| explained_variance | 0.952        |
| fps                | 1231         |
| n_updates          | 387          |
| policy_entropy     | 0.12964125   |
| policy_loss        | 9.31152e-05  |
| serial_timesteps   | 1238400      |
| time_elapsed       | 1.19e+03     |
| total_timesteps    | 1238400      |
| value_loss         | 5.420205     |
------------------------

--------------------------------------
| approxkl           | 1.6530717e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.973         |
| fps                | 1216          |
| n_updates          | 402           |
| policy_entropy     | 0.14309382    |
| policy_loss        | 5.268827e-06  |
| serial_timesteps   | 1286400       |
| time_elapsed       | 1.23e+03      |
| total_timesteps    | 1286400       |
| value_loss         | 5.1293783     |
--------------------------------------
---------------------------------------
| approxkl           | 4.166085e-06   |
| clipfrac           | 0.0            |
| explained_variance | 0.96           |
| fps                | 1218           |
| n_updates          | 403            |
| policy_entropy     | 0.11726358     |
| policy_loss        | -1.3132245e-05 |
| serial_timesteps   | 1289600        |
| time_elapsed       | 1.23e+03       |
| total_timesteps    | 1289600        |
| value_loss         | 5.85666        |
-------------

---------------------------------------
| approxkl           | 0.00018466313  |
| clipfrac           | 0.00078125     |
| explained_variance | 0.978          |
| fps                | 1183           |
| n_updates          | 419            |
| policy_entropy     | 0.1686091      |
| policy_loss        | -3.9122853e-05 |
| serial_timesteps   | 1340800        |
| time_elapsed       | 1.27e+03       |
| total_timesteps    | 1340800        |
| value_loss         | 2.7525768      |
---------------------------------------
---------------------------------------
| approxkl           | 0.000101902755 |
| clipfrac           | 0.000234375    |
| explained_variance | 0.979          |
| fps                | 1203           |
| n_updates          | 420            |
| policy_entropy     | 0.16854508     |
| policy_loss        | -1.9459501e-05 |
| serial_timesteps   | 1344000        |
| time_elapsed       | 1.28e+03       |
| total_timesteps    | 1344000        |
| value_loss         | 3.1266441      |


--------------------------------------
| approxkl           | 2.3782328e-05 |
| clipfrac           | 0.00015625    |
| explained_variance | 0.963         |
| fps                | 1199          |
| n_updates          | 435           |
| policy_entropy     | 0.14511517    |
| policy_loss        | -0.0002302462 |
| serial_timesteps   | 1392000       |
| time_elapsed       | 1.32e+03      |
| total_timesteps    | 1392000       |
| value_loss         | 5.4481034     |
--------------------------------------
---------------------------------------
| approxkl           | 7.356557e-06   |
| clipfrac           | 0.0            |
| explained_variance | 0.933          |
| fps                | 1228           |
| n_updates          | 436            |
| policy_entropy     | 0.051565506    |
| policy_loss        | -2.8185174e-05 |
| serial_timesteps   | 1395200        |
| time_elapsed       | 1.32e+03       |
| total_timesteps    | 1395200        |
| value_loss         | 8.003425       |
-------------

--------------------------------------
| approxkl           | 2.3613247e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.964         |
| fps                | 1078          |
| n_updates          | 452           |
| policy_entropy     | 0.094672084   |
| policy_loss        | 3.5837293e-06 |
| serial_timesteps   | 1446400       |
| time_elapsed       | 1.37e+03      |
| total_timesteps    | 1446400       |
| value_loss         | 5.240752      |
--------------------------------------
--------------------------------------
| approxkl           | 4.7277767e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.934         |
| fps                | 1126          |
| n_updates          | 453           |
| policy_entropy     | 0.05803579    |
| policy_loss        | 3.3858065e-05 |
| serial_timesteps   | 1449600       |
| time_elapsed       | 1.37e+03      |
| total_timesteps    | 1449600       |
| value_loss         | 7.599522      |
-------------------------

--------------------------------------
| approxkl           | 1.3302051e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.972         |
| fps                | 1177          |
| n_updates          | 469           |
| policy_entropy     | 0.17782968    |
| policy_loss        | -1.833383e-05 |
| serial_timesteps   | 1500800       |
| time_elapsed       | 1.41e+03      |
| total_timesteps    | 1500800       |
| value_loss         | 2.461094      |
--------------------------------------
---------------------------------------
| approxkl           | 8.487466e-06   |
| clipfrac           | 0.0            |
| explained_variance | 0.971          |
| fps                | 1247           |
| n_updates          | 470            |
| policy_entropy     | 0.10762333     |
| policy_loss        | -2.3282992e-05 |
| serial_timesteps   | 1504000        |
| time_elapsed       | 1.41e+03       |
| total_timesteps    | 1504000        |
| value_loss         | 6.254597       |
-------------

---------------------------------------
| approxkl           | 1.1582139e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.978          |
| fps                | 1123           |
| n_updates          | 485            |
| policy_entropy     | 0.17110337     |
| policy_loss        | -2.2067761e-05 |
| serial_timesteps   | 1552000        |
| time_elapsed       | 1.46e+03       |
| total_timesteps    | 1552000        |
| value_loss         | 3.7875383      |
---------------------------------------
--------------------------------------
| approxkl           | 6.111024e-06  |
| clipfrac           | 0.0           |
| explained_variance | 0.977         |
| fps                | 1161          |
| n_updates          | 486           |
| policy_entropy     | 0.11740692    |
| policy_loss        | -6.638542e-06 |
| serial_timesteps   | 1555200       |
| time_elapsed       | 1.46e+03      |
| total_timesteps    | 1555200       |
| value_loss         | 4.036463      |
------------

---------------------------------------
| approxkl           | 1.9487012e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.961          |
| fps                | 960            |
| n_updates          | 501            |
| policy_entropy     | 0.07553468     |
| policy_loss        | -2.2803917e-05 |
| serial_timesteps   | 1603200        |
| time_elapsed       | 1.5e+03        |
| total_timesteps    | 1603200        |
| value_loss         | 6.7186275      |
---------------------------------------
---------------------------------------
| approxkl           | 0.00022347318  |
| clipfrac           | 0.0014062499   |
| explained_variance | 0.97           |
| fps                | 998            |
| n_updates          | 502            |
| policy_entropy     | 0.14845666     |
| policy_loss        | -2.3513734e-05 |
| serial_timesteps   | 1606400        |
| time_elapsed       | 1.51e+03       |
| total_timesteps    | 1606400        |
| value_loss         | 5.439986       |


--------------------------------------
| approxkl           | 0.00038500392 |
| clipfrac           | 0.0051562497  |
| explained_variance | 0.963         |
| fps                | 1190          |
| n_updates          | 517           |
| policy_entropy     | 0.10761961    |
| policy_loss        | 4.6919435e-05 |
| serial_timesteps   | 1654400       |
| time_elapsed       | 1.55e+03      |
| total_timesteps    | 1654400       |
| value_loss         | 5.2415943     |
--------------------------------------
--------------------------------------
| approxkl           | 0.000293751   |
| clipfrac           | 0.0025781249  |
| explained_variance | 0.973         |
| fps                | 1204          |
| n_updates          | 518           |
| policy_entropy     | 0.15835884    |
| policy_loss        | -9.507328e-05 |
| serial_timesteps   | 1657600       |
| time_elapsed       | 1.55e+03      |
| total_timesteps    | 1657600       |
| value_loss         | 4.2554507     |
-------------------------

---------------------------------------
| approxkl           | 6.8487475e-06  |
| clipfrac           | 0.0            |
| explained_variance | 0.965          |
| fps                | 1157           |
| n_updates          | 533            |
| policy_entropy     | 0.105305634    |
| policy_loss        | -1.2933687e-05 |
| serial_timesteps   | 1705600        |
| time_elapsed       | 1.59e+03       |
| total_timesteps    | 1705600        |
| value_loss         | 6.739891       |
---------------------------------------
---------------------------------------
| approxkl           | 1.709227e-05   |
| clipfrac           | 0.0            |
| explained_variance | 0.972          |
| fps                | 1168           |
| n_updates          | 534            |
| policy_entropy     | 0.15869588     |
| policy_loss        | -0.00012371354 |
| serial_timesteps   | 1708800        |
| time_elapsed       | 1.6e+03        |
| total_timesteps    | 1708800        |
| value_loss         | 4.602706       |


---------------------------------------
| approxkl           | 2.0928954e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.96           |
| fps                | 1143           |
| n_updates          | 549            |
| policy_entropy     | 0.19030957     |
| policy_loss        | -0.00010648384 |
| serial_timesteps   | 1756800        |
| time_elapsed       | 1.64e+03       |
| total_timesteps    | 1756800        |
| value_loss         | 4.109274       |
---------------------------------------
----------------------------------------
| approxkl           | 2.7658555e-06   |
| clipfrac           | 0.0             |
| explained_variance | 0.949           |
| fps                | 1209            |
| n_updates          | 550             |
| policy_entropy     | 0.075636104     |
| policy_loss        | -1.28313895e-05 |
| serial_timesteps   | 1760000         |
| time_elapsed       | 1.64e+03        |
| total_timesteps    | 1760000         |
| value_loss         | 7.9410

--------------------------------------
| approxkl           | 9.221105e-06  |
| clipfrac           | 0.0           |
| explained_variance | 0.965         |
| fps                | 1192          |
| n_updates          | 565           |
| policy_entropy     | 0.1143543     |
| policy_loss        | -8.434578e-06 |
| serial_timesteps   | 1808000       |
| time_elapsed       | 1.68e+03      |
| total_timesteps    | 1808000       |
| value_loss         | 6.6510944     |
--------------------------------------
--------------------------------------
| approxkl           | 7.851694e-06  |
| clipfrac           | 0.0           |
| explained_variance | 0.976         |
| fps                | 1184          |
| n_updates          | 566           |
| policy_entropy     | 0.16743559    |
| policy_loss        | -5.394444e-06 |
| serial_timesteps   | 1811200       |
| time_elapsed       | 1.69e+03      |
| total_timesteps    | 1811200       |
| value_loss         | 3.1911714     |
-------------------------

--------------------------------------
| approxkl           | 0.00036178547 |
| clipfrac           | 0.0051562497  |
| explained_variance | 0.925         |
| fps                | 1148          |
| n_updates          | 582           |
| policy_entropy     | 0.08153601    |
| policy_loss        | 0.0001356747  |
| serial_timesteps   | 1862400       |
| time_elapsed       | 1.73e+03      |
| total_timesteps    | 1862400       |
| value_loss         | 8.003318      |
--------------------------------------
--------------------------------------
| approxkl           | 0.00038671843 |
| clipfrac           | 0.00296875    |
| explained_variance | 0.974         |
| fps                | 1113          |
| n_updates          | 583           |
| policy_entropy     | 0.20143625    |
| policy_loss        | 7.692985e-05  |
| serial_timesteps   | 1865600       |
| time_elapsed       | 1.73e+03      |
| total_timesteps    | 1865600       |
| value_loss         | 2.6892564     |
-------------------------

--------------------------------------
| approxkl           | 1.1522645e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.963         |
| fps                | 1108          |
| n_updates          | 599           |
| policy_entropy     | 0.12555698    |
| policy_loss        | 3.1419097e-06 |
| serial_timesteps   | 1916800       |
| time_elapsed       | 1.78e+03      |
| total_timesteps    | 1916800       |
| value_loss         | 5.4257226     |
--------------------------------------
--------------------------------------
| approxkl           | 3.5948078e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.971         |
| fps                | 1169          |
| n_updates          | 600           |
| policy_entropy     | 0.16495025    |
| policy_loss        | -1.719594e-05 |
| serial_timesteps   | 1920000       |
| time_elapsed       | 1.78e+03      |
| total_timesteps    | 1920000       |
| value_loss         | 4.2381997     |
-------------------------

--------------------------------------
| approxkl           | 3.675883e-05  |
| clipfrac           | 0.0           |
| explained_variance | 0.973         |
| fps                | 1041          |
| n_updates          | 615           |
| policy_entropy     | 0.19765522    |
| policy_loss        | -2.466619e-05 |
| serial_timesteps   | 1968000       |
| time_elapsed       | 1.83e+03      |
| total_timesteps    | 1968000       |
| value_loss         | 2.588557      |
--------------------------------------
--------------------------------------
| approxkl           | 0.00046584063 |
| clipfrac           | 0.003125      |
| explained_variance | 0.976         |
| fps                | 1036          |
| n_updates          | 616           |
| policy_entropy     | 0.20974877    |
| policy_loss        | -0.0003448067 |
| serial_timesteps   | 1971200       |
| time_elapsed       | 1.83e+03      |
| total_timesteps    | 1971200       |
| value_loss         | 3.0222225     |
-------------------------

---------------------------------------
| approxkl           | 4.9546084e-06  |
| clipfrac           | 0.0            |
| explained_variance | 0.961          |
| fps                | 1133           |
| n_updates          | 631            |
| policy_entropy     | 0.13392138     |
| policy_loss        | -9.1771035e-06 |
| serial_timesteps   | 2019200        |
| time_elapsed       | 1.88e+03       |
| total_timesteps    | 2019200        |
| value_loss         | 5.398702       |
---------------------------------------
--------------------------------------
| approxkl           | 6.4408277e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.968         |
| fps                | 1123          |
| n_updates          | 632           |
| policy_entropy     | 0.1299482     |
| policy_loss        | -7.760435e-05 |
| serial_timesteps   | 2022400       |
| time_elapsed       | 1.88e+03      |
| total_timesteps    | 2022400       |
| value_loss         | 5.041477      |
------------

--------------------------------------
| approxkl           | 2.4864219e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.953         |
| fps                | 1190          |
| n_updates          | 647           |
| policy_entropy     | 0.07850801    |
| policy_loss        | 5.480014e-06  |
| serial_timesteps   | 2070400       |
| time_elapsed       | 1.92e+03      |
| total_timesteps    | 2070400       |
| value_loss         | 5.704701      |
--------------------------------------
--------------------------------------
| approxkl           | 2.2640866e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.96          |
| fps                | 1191          |
| n_updates          | 648           |
| policy_entropy     | 0.15068051    |
| policy_loss        | 1.3512558e-05 |
| serial_timesteps   | 2073600       |
| time_elapsed       | 1.93e+03      |
| total_timesteps    | 2073600       |
| value_loss         | 4.580581      |
-------------------------

--------------------------------------
| approxkl           | 0.00030733045 |
| clipfrac           | 0.0040625     |
| explained_variance | 0.966         |
| fps                | 1216          |
| n_updates          | 663           |
| policy_entropy     | 0.19106524    |
| policy_loss        | -0.0003287317 |
| serial_timesteps   | 2121600       |
| time_elapsed       | 1.97e+03      |
| total_timesteps    | 2121600       |
| value_loss         | 5.0215206     |
--------------------------------------
-------------------------------------
| approxkl           | 9.040673e-06 |
| clipfrac           | 0.0          |
| explained_variance | 0.973        |
| fps                | 1147         |
| n_updates          | 664          |
| policy_entropy     | 0.20453322   |
| policy_loss        | -3.74645e-05 |
| serial_timesteps   | 2124800      |
| time_elapsed       | 1.97e+03     |
| total_timesteps    | 2124800      |
| value_loss         | 2.9412947    |
-------------------------------------

-------------------------------------
| approxkl           | 0.0015538946 |
| clipfrac           | 0.023359375  |
| explained_variance | 0.941        |
| fps                | 1084         |
| n_updates          | 680          |
| policy_entropy     | 0.23000397   |
| policy_loss        | 0.0019044782 |
| serial_timesteps   | 2176000      |
| time_elapsed       | 2.02e+03     |
| total_timesteps    | 2176000      |
| value_loss         | 8.016732     |
-------------------------------------
--------------------------------------
| approxkl           | 0.0004000762  |
| clipfrac           | 0.00359375    |
| explained_variance | 0.966         |
| fps                | 1065          |
| n_updates          | 681           |
| policy_entropy     | 0.20163609    |
| policy_loss        | -7.906117e-05 |
| serial_timesteps   | 2179200       |
| time_elapsed       | 2.02e+03      |
| total_timesteps    | 2179200       |
| value_loss         | 3.4707046     |
--------------------------------------

---------------------------------------
| approxkl           | 0.00023208547  |
| clipfrac           | 0.00171875     |
| explained_variance | 0.975          |
| fps                | 1175           |
| n_updates          | 697            |
| policy_entropy     | 0.18564627     |
| policy_loss        | -6.5740045e-05 |
| serial_timesteps   | 2230400        |
| time_elapsed       | 2.07e+03       |
| total_timesteps    | 2230400        |
| value_loss         | 1.547926       |
---------------------------------------
---------------------------------------
| approxkl           | 8.756209e-05   |
| clipfrac           | 0.000390625    |
| explained_variance | 0.949          |
| fps                | 1218           |
| n_updates          | 698            |
| policy_entropy     | 0.11215712     |
| policy_loss        | -1.4793873e-05 |
| serial_timesteps   | 2233600        |
| time_elapsed       | 2.07e+03       |
| total_timesteps    | 2233600        |
| value_loss         | 6.7291512      |


---------------------------------------
| approxkl           | 1.2305571e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.965          |
| fps                | 1088           |
| n_updates          | 713            |
| policy_entropy     | 0.18774073     |
| policy_loss        | -2.5082603e-05 |
| serial_timesteps   | 2281600        |
| time_elapsed       | 2.11e+03       |
| total_timesteps    | 2281600        |
| value_loss         | 2.7705965      |
---------------------------------------
---------------------------------------
| approxkl           | 2.528053e-05   |
| clipfrac           | 0.0            |
| explained_variance | 0.946          |
| fps                | 1105           |
| n_updates          | 714            |
| policy_entropy     | 0.1852265      |
| policy_loss        | -0.00016484044 |
| serial_timesteps   | 2284800        |
| time_elapsed       | 2.11e+03       |
| total_timesteps    | 2284800        |
| value_loss         | 3.1481245      |


--------------------------------------
| approxkl           | 5.6262807e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.955         |
| fps                | 1034          |
| n_updates          | 729           |
| policy_entropy     | 0.09819271    |
| policy_loss        | 4.511252e-06  |
| serial_timesteps   | 2332800       |
| time_elapsed       | 2.16e+03      |
| total_timesteps    | 2332800       |
| value_loss         | 6.8272634     |
--------------------------------------
---------------------------------------
| approxkl           | 3.3810718e-06  |
| clipfrac           | 0.0            |
| explained_variance | 0.961          |
| fps                | 1033           |
| n_updates          | 730            |
| policy_entropy     | 0.11261587     |
| policy_loss        | -4.6447294e-06 |
| serial_timesteps   | 2336000        |
| time_elapsed       | 2.16e+03       |
| total_timesteps    | 2336000        |
| value_loss         | 5.378715       |
-------------

---------------------------------------
| approxkl           | 2.8307757e-05  |
| clipfrac           | 0.0003125      |
| explained_variance | 0.931          |
| fps                | 1069           |
| n_updates          | 746            |
| policy_entropy     | 0.09046411     |
| policy_loss        | -0.00024849526 |
| serial_timesteps   | 2387200        |
| time_elapsed       | 2.21e+03       |
| total_timesteps    | 2387200        |
| value_loss         | 8.071125       |
---------------------------------------
---------------------------------------
| approxkl           | 1.1039038e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.968          |
| fps                | 1074           |
| n_updates          | 747            |
| policy_entropy     | 0.1594468      |
| policy_loss        | -2.0008236e-05 |
| serial_timesteps   | 2390400        |
| time_elapsed       | 2.21e+03       |
| total_timesteps    | 2390400        |
| value_loss         | 2.628674       |


---------------------------------------
| approxkl           | 1.208665e-05   |
| clipfrac           | 0.0            |
| explained_variance | 0.959          |
| fps                | 1024           |
| n_updates          | 762            |
| policy_entropy     | 0.15593848     |
| policy_loss        | -2.8834344e-05 |
| serial_timesteps   | 2438400        |
| time_elapsed       | 2.26e+03       |
| total_timesteps    | 2438400        |
| value_loss         | 2.9147022      |
---------------------------------------
-------------------------------------
| approxkl           | 7.104647e-06 |
| clipfrac           | 0.0          |
| explained_variance | 0.96         |
| fps                | 1063         |
| n_updates          | 763          |
| policy_entropy     | 0.11196107   |
| policy_loss        | 3.805384e-06 |
| serial_timesteps   | 2441600      |
| time_elapsed       | 2.26e+03     |
| total_timesteps    | 2441600      |
| value_loss         | 5.303514     |
------------------------

--------------------------------------
| approxkl           | 0.00011162607 |
| clipfrac           | 0.00046875    |
| explained_variance | 0.97          |
| fps                | 1065          |
| n_updates          | 779           |
| policy_entropy     | 0.18773203    |
| policy_loss        | -0.0001510428 |
| serial_timesteps   | 2492800       |
| time_elapsed       | 2.31e+03      |
| total_timesteps    | 2492800       |
| value_loss         | 2.6839278     |
--------------------------------------
---------------------------------------
| approxkl           | 0.0010062202   |
| clipfrac           | 0.00984375     |
| explained_variance | 0.923          |
| fps                | 1073           |
| n_updates          | 780            |
| policy_entropy     | 0.23597935     |
| policy_loss        | -0.00022397644 |
| serial_timesteps   | 2496000        |
| time_elapsed       | 2.31e+03       |
| total_timesteps    | 2496000        |
| value_loss         | 0.9470484      |
-------------

-------------------------------------
| approxkl           | 0.0130437575 |
| clipfrac           | 0.06726563   |
| explained_variance | -0.672       |
| fps                | 1137         |
| n_updates          | 796          |
| policy_entropy     | 0.2123188    |
| policy_loss        | -0.002388017 |
| serial_timesteps   | 2547200      |
| time_elapsed       | 2.36e+03     |
| total_timesteps    | 2547200      |
| value_loss         | 0.21898401   |
-------------------------------------
---------------------------------------
| approxkl           | 0.0023594876   |
| clipfrac           | 0.023671875    |
| explained_variance | -3.48          |
| fps                | 1069           |
| n_updates          | 797            |
| policy_entropy     | 0.2105728      |
| policy_loss        | -0.00019846622 |
| serial_timesteps   | 2550400        |
| time_elapsed       | 2.36e+03       |
| total_timesteps    | 2550400        |
| value_loss         | 1.9220612      |
--------------------------

--------------------------------------
| approxkl           | 0.004867233   |
| clipfrac           | 0.051484376   |
| explained_variance | -0.037        |
| fps                | 1055          |
| n_updates          | 813           |
| policy_entropy     | 0.23127365    |
| policy_loss        | -0.0018504383 |
| serial_timesteps   | 2601600       |
| time_elapsed       | 2.41e+03      |
| total_timesteps    | 2601600       |
| value_loss         | 0.064482875   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0029344852 |
| clipfrac           | 0.031640626  |
| explained_variance | -0.37        |
| fps                | 1029         |
| n_updates          | 814          |
| policy_entropy     | 0.23795094   |
| policy_loss        | -0.000250462 |
| serial_timesteps   | 2604800      |
| time_elapsed       | 2.41e+03     |
| total_timesteps    | 2604800      |
| value_loss         | 0.44619125   |
-------------------------------------

--------------------------------------
| approxkl           | 0.008507696   |
| clipfrac           | 0.055703126   |
| explained_variance | 0.815         |
| fps                | 1105          |
| n_updates          | 830           |
| policy_entropy     | 0.22064109    |
| policy_loss        | -0.0037393202 |
| serial_timesteps   | 2656000       |
| time_elapsed       | 2.46e+03      |
| total_timesteps    | 2656000       |
| value_loss         | 0.028816648   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0031102912 |
| clipfrac           | 0.029765625  |
| explained_variance | 0.849        |
| fps                | 1082         |
| n_updates          | 831          |
| policy_entropy     | 0.2125794    |
| policy_loss        | -0.001694096 |
| serial_timesteps   | 2659200      |
| time_elapsed       | 2.46e+03     |
| total_timesteps    | 2659200      |
| value_loss         | 0.020781027  |
-------------------------------------

--------------------------------------
| approxkl           | 0.0044428147  |
| clipfrac           | 0.043046873   |
| explained_variance | 0.646         |
| fps                | 1025          |
| n_updates          | 847           |
| policy_entropy     | 0.21064235    |
| policy_loss        | -0.0026709922 |
| serial_timesteps   | 2710400       |
| time_elapsed       | 2.51e+03      |
| total_timesteps    | 2710400       |
| value_loss         | 0.024160609   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0015779769  |
| clipfrac           | 0.016796876   |
| explained_variance | 0.7           |
| fps                | 1004          |
| n_updates          | 848           |
| policy_entropy     | 0.22160174    |
| policy_loss        | -0.0029102997 |
| serial_timesteps   | 2713600       |
| time_elapsed       | 2.51e+03      |
| total_timesteps    | 2713600       |
| value_loss         | 0.016937427   |
-------------------------

-------------------------------------
| approxkl           | 0.0011677508 |
| clipfrac           | 0.011796875  |
| explained_variance | 0.634        |
| fps                | 1029         |
| n_updates          | 864          |
| policy_entropy     | 0.18486771   |
| policy_loss        | 0.002019374  |
| serial_timesteps   | 2764800      |
| time_elapsed       | 2.56e+03     |
| total_timesteps    | 2764800      |
| value_loss         | 5.8938265    |
-------------------------------------
--------------------------------------
| approxkl           | 0.0022756443  |
| clipfrac           | 0.0265625     |
| explained_variance | 0.835         |
| fps                | 983           |
| n_updates          | 865           |
| policy_entropy     | 0.17933609    |
| policy_loss        | -0.0012600913 |
| serial_timesteps   | 2768000       |
| time_elapsed       | 2.56e+03      |
| total_timesteps    | 2768000       |
| value_loss         | 0.01587994    |
--------------------------------------

--------------------------------------
| approxkl           | 0.00056767283 |
| clipfrac           | 0.007109375   |
| explained_variance | 0.868         |
| fps                | 995           |
| n_updates          | 881           |
| policy_entropy     | 0.16951878    |
| policy_loss        | -0.0013604455 |
| serial_timesteps   | 2819200       |
| time_elapsed       | 2.61e+03      |
| total_timesteps    | 2819200       |
| value_loss         | 0.020646349   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0006574707  |
| clipfrac           | 0.0073437495  |
| explained_variance | 0.82          |
| fps                | 997           |
| n_updates          | 882           |
| policy_entropy     | 0.16033149    |
| policy_loss        | -0.0013976682 |
| serial_timesteps   | 2822400       |
| time_elapsed       | 2.62e+03      |
| total_timesteps    | 2822400       |
| value_loss         | 0.02389629    |
-------------------------

--------------------------------------
| approxkl           | 0.00056230533 |
| clipfrac           | 0.006015625   |
| explained_variance | 0.815         |
| fps                | 1144          |
| n_updates          | 898           |
| policy_entropy     | 0.13246867    |
| policy_loss        | 0.0003957367  |
| serial_timesteps   | 2873600       |
| time_elapsed       | 2.66e+03      |
| total_timesteps    | 2873600       |
| value_loss         | 0.5323667     |
--------------------------------------
--------------------------------------
| approxkl           | 0.00084568077 |
| clipfrac           | 0.0103124995  |
| explained_variance | 0.872         |
| fps                | 1151          |
| n_updates          | 899           |
| policy_entropy     | 0.13672726    |
| policy_loss        | 7.191155e-05  |
| serial_timesteps   | 2876800       |
| time_elapsed       | 2.66e+03      |
| total_timesteps    | 2876800       |
| value_loss         | 0.25589412    |
-------------------------

---------------------------------------
| approxkl           | 0.00040066137  |
| clipfrac           | 0.005          |
| explained_variance | 0.687          |
| fps                | 1133           |
| n_updates          | 915            |
| policy_entropy     | 0.14726773     |
| policy_loss        | -0.00036806113 |
| serial_timesteps   | 2928000        |
| time_elapsed       | 2.71e+03       |
| total_timesteps    | 2928000        |
| value_loss         | 0.039266832    |
---------------------------------------
-------------------------------------
| approxkl           | 0.0013730694 |
| clipfrac           | 0.015546875  |
| explained_variance | 0.755        |
| fps                | 1122         |
| n_updates          | 916          |
| policy_entropy     | 0.1380661    |
| policy_loss        | -0.001713911 |
| serial_timesteps   | 2931200      |
| time_elapsed       | 2.71e+03     |
| total_timesteps    | 2931200      |
| value_loss         | 0.015072482  |
------------------------

--------------------------------------
| approxkl           | 0.00015714113 |
| clipfrac           | 0.001171875   |
| explained_variance | 0.801         |
| fps                | 1141          |
| n_updates          | 932           |
| policy_entropy     | 0.11183961    |
| policy_loss        | 9.116381e-06  |
| serial_timesteps   | 2982400       |
| time_elapsed       | 2.76e+03      |
| total_timesteps    | 2982400       |
| value_loss         | 4.8991346     |
--------------------------------------
---------------------------------------
| approxkl           | 0.00023924322  |
| clipfrac           | 0.00234375     |
| explained_variance | 0.88           |
| fps                | 1136           |
| n_updates          | 933            |
| policy_entropy     | 0.15381837     |
| policy_loss        | -0.00027217763 |
| serial_timesteps   | 2985600        |
| time_elapsed       | 2.76e+03       |
| total_timesteps    | 2985600        |
| value_loss         | 0.22179595     |
-------------

--------------------------------------
| approxkl           | 0.0033188066  |
| clipfrac           | 0.029140625   |
| explained_variance | 0.863         |
| fps                | 1163          |
| n_updates          | 949           |
| policy_entropy     | 0.15507299    |
| policy_loss        | -0.0019704818 |
| serial_timesteps   | 3036800       |
| time_elapsed       | 2.8e+03       |
| total_timesteps    | 3036800       |
| value_loss         | 0.014435062   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0021767481  |
| clipfrac           | 0.025078125   |
| explained_variance | 0.906         |
| fps                | 1170          |
| n_updates          | 950           |
| policy_entropy     | 0.14912128    |
| policy_loss        | -0.0018574626 |
| serial_timesteps   | 3040000       |
| time_elapsed       | 2.81e+03      |
| total_timesteps    | 3040000       |
| value_loss         | 0.012525966   |
-------------------------

-------------------------------------
| approxkl           | 0.0029714224 |
| clipfrac           | 0.029609375  |
| explained_variance | 0.88         |
| fps                | 1166         |
| n_updates          | 966          |
| policy_entropy     | 0.1482028    |
| policy_loss        | -0.000887054 |
| serial_timesteps   | 3091200      |
| time_elapsed       | 2.85e+03     |
| total_timesteps    | 3091200      |
| value_loss         | 0.016166518  |
-------------------------------------
--------------------------------------
| approxkl           | 0.0017575037  |
| clipfrac           | 0.0203125     |
| explained_variance | 0.92          |
| fps                | 1154          |
| n_updates          | 967           |
| policy_entropy     | 0.13395229    |
| policy_loss        | -0.0008384946 |
| serial_timesteps   | 3094400       |
| time_elapsed       | 2.85e+03      |
| total_timesteps    | 3094400       |
| value_loss         | 0.008972385   |
--------------------------------------

-------------------------------------
| approxkl           | 0.0006994649 |
| clipfrac           | 0.0064843753 |
| explained_variance | 0.742        |
| fps                | 1203         |
| n_updates          | 983          |
| policy_entropy     | 0.11247985   |
| policy_loss        | 0.0005171698 |
| serial_timesteps   | 3145600      |
| time_elapsed       | 2.9e+03      |
| total_timesteps    | 3145600      |
| value_loss         | 2.682199     |
-------------------------------------
--------------------------------------
| approxkl           | 0.00015014898 |
| clipfrac           | 0.0016406251  |
| explained_variance | 0.62          |
| fps                | 1178          |
| n_updates          | 984           |
| policy_entropy     | 0.1284096     |
| policy_loss        | 7.299625e-05  |
| serial_timesteps   | 3148800       |
| time_elapsed       | 2.9e+03       |
| total_timesteps    | 3148800       |
| value_loss         | 2.6636128     |
--------------------------------------

--------------------------------------
| approxkl           | 0.0022297725  |
| clipfrac           | 0.01984375    |
| explained_variance | 0.962         |
| fps                | 1105          |
| n_updates          | 1000          |
| policy_entropy     | 0.12677704    |
| policy_loss        | -0.0027329684 |
| serial_timesteps   | 3200000       |
| time_elapsed       | 2.95e+03      |
| total_timesteps    | 3200000       |
| value_loss         | 0.030546647   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0025575636 |
| clipfrac           | 0.025781251  |
| explained_variance | 0.771        |
| fps                | 1109         |
| n_updates          | 1001         |
| policy_entropy     | 0.12995341   |
| policy_loss        | -0.000610661 |
| serial_timesteps   | 3203200      |
| time_elapsed       | 2.95e+03     |
| total_timesteps    | 3203200      |
| value_loss         | 0.023129927  |
-------------------------------------

--------------------------------------
| approxkl           | 0.0009177348  |
| clipfrac           | 0.008437499   |
| explained_variance | 0.939         |
| fps                | 1008          |
| n_updates          | 1017          |
| policy_entropy     | 0.10692794    |
| policy_loss        | -0.0011993868 |
| serial_timesteps   | 3254400       |
| time_elapsed       | 3e+03         |
| total_timesteps    | 3254400       |
| value_loss         | 0.0063648745  |
--------------------------------------
-------------------------------------
| approxkl           | 0.000826914  |
| clipfrac           | 0.01015625   |
| explained_variance | 0.948        |
| fps                | 1005         |
| n_updates          | 1018         |
| policy_entropy     | 0.12145032   |
| policy_loss        | -0.001515768 |
| serial_timesteps   | 3257600      |
| time_elapsed       | 3e+03        |
| total_timesteps    | 3257600      |
| value_loss         | 0.0060292208 |
-------------------------------------

--------------------------------------
| approxkl           | 0.0011464977  |
| clipfrac           | 0.014453124   |
| explained_variance | 0.908         |
| fps                | 1123          |
| n_updates          | 1034          |
| policy_entropy     | 0.12310311    |
| policy_loss        | -0.0010172941 |
| serial_timesteps   | 3308800       |
| time_elapsed       | 3.05e+03      |
| total_timesteps    | 3308800       |
| value_loss         | 0.0092074955  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0015174351  |
| clipfrac           | 0.017109375   |
| explained_variance | 0.789         |
| fps                | 1142          |
| n_updates          | 1035          |
| policy_entropy     | 0.13818914    |
| policy_loss        | -0.0031248427 |
| serial_timesteps   | 3312000       |
| time_elapsed       | 3.05e+03      |
| total_timesteps    | 3312000       |
| value_loss         | 0.025253046   |
-------------------------

--------------------------------------
| approxkl           | 0.00082487956 |
| clipfrac           | 0.009140626   |
| explained_variance | 0.433         |
| fps                | 1017          |
| n_updates          | 1051          |
| policy_entropy     | 0.13393171    |
| policy_loss        | -0.0016655169 |
| serial_timesteps   | 3363200       |
| time_elapsed       | 3.1e+03       |
| total_timesteps    | 3363200       |
| value_loss         | 0.04268328    |
--------------------------------------
--------------------------------------
| approxkl           | 0.0015108604  |
| clipfrac           | 0.014609375   |
| explained_variance | 0.624         |
| fps                | 933           |
| n_updates          | 1052          |
| policy_entropy     | 0.12507822    |
| policy_loss        | -0.0015646028 |
| serial_timesteps   | 3366400       |
| time_elapsed       | 3.1e+03       |
| total_timesteps    | 3366400       |
| value_loss         | 0.023626536   |
-------------------------

--------------------------------------
| approxkl           | 0.00013422672 |
| clipfrac           | 0.001015625   |
| explained_variance | 0.302         |
| fps                | 1200          |
| n_updates          | 1068          |
| policy_entropy     | 0.087062575   |
| policy_loss        | 4.679203e-05  |
| serial_timesteps   | 3417600       |
| time_elapsed       | 3.15e+03      |
| total_timesteps    | 3417600       |
| value_loss         | 6.9327044     |
--------------------------------------
--------------------------------------
| approxkl           | 0.001232554   |
| clipfrac           | 0.01234375    |
| explained_variance | 0.786         |
| fps                | 1164          |
| n_updates          | 1069          |
| policy_entropy     | 0.11352399    |
| policy_loss        | -0.0027773455 |
| serial_timesteps   | 3420800       |
| time_elapsed       | 3.15e+03      |
| total_timesteps    | 3420800       |
| value_loss         | 0.072195865   |
-------------------------

-------------------------------------
| approxkl           | 0.0011048871 |
| clipfrac           | 0.013828125  |
| explained_variance | -0.244       |
| fps                | 1016         |
| n_updates          | 1085         |
| policy_entropy     | 0.12890202   |
| policy_loss        | -0.002657389 |
| serial_timesteps   | 3472000      |
| time_elapsed       | 3.2e+03      |
| total_timesteps    | 3472000      |
| value_loss         | 0.015243013  |
-------------------------------------
--------------------------------------
| approxkl           | 0.0029298132  |
| clipfrac           | 0.023984376   |
| explained_variance | -0.705        |
| fps                | 1037          |
| n_updates          | 1086          |
| policy_entropy     | 0.12911674    |
| policy_loss        | -0.0014513532 |
| serial_timesteps   | 3475200       |
| time_elapsed       | 3.2e+03       |
| total_timesteps    | 3475200       |
| value_loss         | 0.007840339   |
--------------------------------------

--------------------------------------
| approxkl           | 0.0019058117  |
| clipfrac           | 0.021875001   |
| explained_variance | -0.802        |
| fps                | 1048          |
| n_updates          | 1102          |
| policy_entropy     | 0.095125616   |
| policy_loss        | -0.0019908636 |
| serial_timesteps   | 3526400       |
| time_elapsed       | 3.25e+03      |
| total_timesteps    | 3526400       |
| value_loss         | 0.0072437185  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0016142309  |
| clipfrac           | 0.015234375   |
| explained_variance | -0.0301       |
| fps                | 1061          |
| n_updates          | 1103          |
| policy_entropy     | 0.09540283    |
| policy_loss        | -0.0027916874 |
| serial_timesteps   | 3529600       |
| time_elapsed       | 3.25e+03      |
| total_timesteps    | 3529600       |
| value_loss         | 0.00874605    |
-------------------------

---------------------------------------
| approxkl           | 0.0007550714   |
| clipfrac           | 0.005234375    |
| explained_variance | 0.933          |
| fps                | 1172           |
| n_updates          | 1119           |
| policy_entropy     | 0.07259907     |
| policy_loss        | -0.00010852553 |
| serial_timesteps   | 3580800        |
| time_elapsed       | 3.3e+03        |
| total_timesteps    | 3580800        |
| value_loss         | 0.8121065      |
---------------------------------------
--------------------------------------
| approxkl           | 0.0002822082  |
| clipfrac           | 0.003203125   |
| explained_variance | 0.805         |
| fps                | 1152          |
| n_updates          | 1120          |
| policy_entropy     | 0.08426702    |
| policy_loss        | -9.484127e-05 |
| serial_timesteps   | 3584000       |
| time_elapsed       | 3.3e+03       |
| total_timesteps    | 3584000       |
| value_loss         | 3.5851414     |
------------

--------------------------------------
| approxkl           | 0.0014788171  |
| clipfrac           | 0.015390625   |
| explained_variance | 0.237         |
| fps                | 1136          |
| n_updates          | 1136          |
| policy_entropy     | 0.08446808    |
| policy_loss        | -0.0010853179 |
| serial_timesteps   | 3635200       |
| time_elapsed       | 3.35e+03      |
| total_timesteps    | 3635200       |
| value_loss         | 0.018308066   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0009453858  |
| clipfrac           | 0.0122656245  |
| explained_variance | 0.446         |
| fps                | 1118          |
| n_updates          | 1137          |
| policy_entropy     | 0.077187516   |
| policy_loss        | -0.0011174347 |
| serial_timesteps   | 3638400       |
| time_elapsed       | 3.35e+03      |
| total_timesteps    | 3638400       |
| value_loss         | 0.002009085   |
-------------------------

--------------------------------------
| approxkl           | 0.0009572214  |
| clipfrac           | 0.0122656245  |
| explained_variance | 0.16          |
| fps                | 1151          |
| n_updates          | 1153          |
| policy_entropy     | 0.0837984     |
| policy_loss        | -0.0011012496 |
| serial_timesteps   | 3689600       |
| time_elapsed       | 3.39e+03      |
| total_timesteps    | 3689600       |
| value_loss         | 0.003020987   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0028562837  |
| clipfrac           | 0.021171875   |
| explained_variance | -2.66         |
| fps                | 1164          |
| n_updates          | 1154          |
| policy_entropy     | 0.086314894   |
| policy_loss        | -0.0006528937 |
| serial_timesteps   | 3692800       |
| time_elapsed       | 3.4e+03       |
| total_timesteps    | 3692800       |
| value_loss         | 0.0149100525  |
-------------------------

--------------------------------------
| approxkl           | 0.0006370849  |
| clipfrac           | 0.005546875   |
| explained_variance | 0.568         |
| fps                | 1111          |
| n_updates          | 1170          |
| policy_entropy     | 0.07198739    |
| policy_loss        | -0.0008820221 |
| serial_timesteps   | 3744000       |
| time_elapsed       | 3.44e+03      |
| total_timesteps    | 3744000       |
| value_loss         | 0.0036024768  |
--------------------------------------
-------------------------------------
| approxkl           | 0.0008157078 |
| clipfrac           | 0.00828125   |
| explained_variance | 0.326        |
| fps                | 1088         |
| n_updates          | 1171         |
| policy_entropy     | 0.08263174   |
| policy_loss        | -0.001248146 |
| serial_timesteps   | 3747200      |
| time_elapsed       | 3.45e+03     |
| total_timesteps    | 3747200      |
| value_loss         | 0.0022994643 |
-------------------------------------

--------------------------------------
| approxkl           | 0.0005618512  |
| clipfrac           | 0.006953125   |
| explained_variance | 0.135         |
| fps                | 1145          |
| n_updates          | 1187          |
| policy_entropy     | 0.07691137    |
| policy_loss        | -0.0019558105 |
| serial_timesteps   | 3798400       |
| time_elapsed       | 3.49e+03      |
| total_timesteps    | 3798400       |
| value_loss         | 0.004368477   |
--------------------------------------
--------------------------------------
| approxkl           | 0.000856597   |
| clipfrac           | 0.00984375    |
| explained_variance | 0.982         |
| fps                | 1130          |
| n_updates          | 1188          |
| policy_entropy     | 0.06680766    |
| policy_loss        | -0.0017823186 |
| serial_timesteps   | 3801600       |
| time_elapsed       | 3.5e+03       |
| total_timesteps    | 3801600       |
| value_loss         | 0.019286955   |
-------------------------

--------------------------------------
| approxkl           | 0.0031415566  |
| clipfrac           | 0.02515625    |
| explained_variance | 0.134         |
| fps                | 1053          |
| n_updates          | 1204          |
| policy_entropy     | 0.09091587    |
| policy_loss        | -0.0036364668 |
| serial_timesteps   | 3852800       |
| time_elapsed       | 3.55e+03      |
| total_timesteps    | 3852800       |
| value_loss         | 0.009639818   |
--------------------------------------
---------------------------------------
| approxkl           | 0.0007787454   |
| clipfrac           | 0.00625        |
| explained_variance | 0.413          |
| fps                | 1087           |
| n_updates          | 1205           |
| policy_entropy     | 0.07940762     |
| policy_loss        | -0.00088940375 |
| serial_timesteps   | 3856000        |
| time_elapsed       | 3.55e+03       |
| total_timesteps    | 3856000        |
| value_loss         | 0.005994453    |
-------------

--------------------------------------
| approxkl           | 9.7878125e-05 |
| clipfrac           | 0.000859375   |
| explained_variance | 0.667         |
| fps                | 1123          |
| n_updates          | 1221          |
| policy_entropy     | 0.054689348   |
| policy_loss        | 3.0429735e-05 |
| serial_timesteps   | 3907200       |
| time_elapsed       | 3.6e+03       |
| total_timesteps    | 3907200       |
| value_loss         | 4.323496      |
--------------------------------------
--------------------------------------
| approxkl           | 0.00050652446 |
| clipfrac           | 0.006171875   |
| explained_variance | 0.967         |
| fps                | 1144          |
| n_updates          | 1222          |
| policy_entropy     | 0.077688      |
| policy_loss        | -0.0019237127 |
| serial_timesteps   | 3910400       |
| time_elapsed       | 3.6e+03       |
| total_timesteps    | 3910400       |
| value_loss         | 0.05850268    |
-------------------------

--------------------------------------
| approxkl           | 0.0017767737  |
| clipfrac           | 0.01546875    |
| explained_variance | 0.295         |
| fps                | 1003          |
| n_updates          | 1238          |
| policy_entropy     | 0.07108643    |
| policy_loss        | -0.0014574664 |
| serial_timesteps   | 3961600       |
| time_elapsed       | 3.65e+03      |
| total_timesteps    | 3961600       |
| value_loss         | 0.0021235982  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0017479379  |
| clipfrac           | 0.018125001   |
| explained_variance | 0.42          |
| fps                | 1008          |
| n_updates          | 1239          |
| policy_entropy     | 0.083600365   |
| policy_loss        | -0.0013638083 |
| serial_timesteps   | 3964800       |
| time_elapsed       | 3.65e+03      |
| total_timesteps    | 3964800       |
| value_loss         | 0.0018408198  |
-------------------------

--------------------------------------
| approxkl           | 0.0009950824  |
| clipfrac           | 0.012734375   |
| explained_variance | 0.539         |
| fps                | 1001          |
| n_updates          | 1255          |
| policy_entropy     | 0.083132476   |
| policy_loss        | -0.0011925619 |
| serial_timesteps   | 4016000       |
| time_elapsed       | 3.7e+03       |
| total_timesteps    | 4016000       |
| value_loss         | 0.0016157203  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0010633157  |
| clipfrac           | 0.011640625   |
| explained_variance | 0.523         |
| fps                | 1012          |
| n_updates          | 1256          |
| policy_entropy     | 0.06861065    |
| policy_loss        | -0.0016757827 |
| serial_timesteps   | 4019200       |
| time_elapsed       | 3.7e+03       |
| total_timesteps    | 4019200       |
| value_loss         | 0.0014064239  |
-------------------------

--------------------------------------
| approxkl           | 0.0015470673  |
| clipfrac           | 0.0121875     |
| explained_variance | 0.578         |
| fps                | 980           |
| n_updates          | 1272          |
| policy_entropy     | 0.068761505   |
| policy_loss        | -0.0003238974 |
| serial_timesteps   | 4070400       |
| time_elapsed       | 3.75e+03      |
| total_timesteps    | 4070400       |
| value_loss         | 0.0015854312  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0018852795  |
| clipfrac           | 0.01          |
| explained_variance | 0.577         |
| fps                | 1003          |
| n_updates          | 1273          |
| policy_entropy     | 0.06827913    |
| policy_loss        | -0.0012535714 |
| serial_timesteps   | 4073600       |
| time_elapsed       | 3.76e+03      |
| total_timesteps    | 4073600       |
| value_loss         | 0.018359892   |
-------------------------

--------------------------------------
| approxkl           | 0.0010094164  |
| clipfrac           | 0.00765625    |
| explained_variance | 0.0509        |
| fps                | 1082          |
| n_updates          | 1289          |
| policy_entropy     | 0.05930084    |
| policy_loss        | -0.0017767043 |
| serial_timesteps   | 4124800       |
| time_elapsed       | 3.81e+03      |
| total_timesteps    | 4124800       |
| value_loss         | 0.01181676    |
--------------------------------------
-------------------------------------
| approxkl           | 0.0007396431 |
| clipfrac           | 0.006640625  |
| explained_variance | -0.493       |
| fps                | 1139         |
| n_updates          | 1290         |
| policy_entropy     | 0.06298357   |
| policy_loss        | -0.000824241 |
| serial_timesteps   | 4128000      |
| time_elapsed       | 3.81e+03     |
| total_timesteps    | 4128000      |
| value_loss         | 0.004678248  |
-------------------------------------

--------------------------------------
| approxkl           | 0.00097386935 |
| clipfrac           | 0.009921875   |
| explained_variance | 0.253         |
| fps                | 1111          |
| n_updates          | 1306          |
| policy_entropy     | 0.07225074    |
| policy_loss        | -0.0012788293 |
| serial_timesteps   | 4179200       |
| time_elapsed       | 3.86e+03      |
| total_timesteps    | 4179200       |
| value_loss         | 0.002151721   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00061361236 |
| clipfrac           | 0.0062499996  |
| explained_variance | 0.359         |
| fps                | 1122          |
| n_updates          | 1307          |
| policy_entropy     | 0.065538526   |
| policy_loss        | -0.0021321995 |
| serial_timesteps   | 4182400       |
| time_elapsed       | 3.86e+03      |
| total_timesteps    | 4182400       |
| value_loss         | 0.0045815497  |
-------------------------

--------------------------------------
| approxkl           | 0.0015751155  |
| clipfrac           | 0.015546875   |
| explained_variance | -0.364        |
| fps                | 992           |
| n_updates          | 1323          |
| policy_entropy     | 0.066256434   |
| policy_loss        | -0.0010553125 |
| serial_timesteps   | 4233600       |
| time_elapsed       | 3.91e+03      |
| total_timesteps    | 4233600       |
| value_loss         | 0.0052691633  |
--------------------------------------
--------------------------------------
| approxkl           | 0.000880424   |
| clipfrac           | 0.009765625   |
| explained_variance | -0.0557       |
| fps                | 1014          |
| n_updates          | 1324          |
| policy_entropy     | 0.057236955   |
| policy_loss        | -0.0006514264 |
| serial_timesteps   | 4236800       |
| time_elapsed       | 3.91e+03      |
| total_timesteps    | 4236800       |
| value_loss         | 0.0038802307  |
-------------------------

---------------------------------------
| approxkl           | 0.00030814632  |
| clipfrac           | 0.0037500001   |
| explained_variance | 0.0804         |
| fps                | 1064           |
| n_updates          | 1340           |
| policy_entropy     | 0.087499216    |
| policy_loss        | -0.00056091056 |
| serial_timesteps   | 4288000        |
| time_elapsed       | 3.96e+03       |
| total_timesteps    | 4288000        |
| value_loss         | 0.01361011     |
---------------------------------------
--------------------------------------
| approxkl           | 0.0012826148  |
| clipfrac           | 0.0122656245  |
| explained_variance | -1.49         |
| fps                | 1043          |
| n_updates          | 1341          |
| policy_entropy     | 0.08293415    |
| policy_loss        | -0.0014482873 |
| serial_timesteps   | 4291200       |
| time_elapsed       | 3.97e+03      |
| total_timesteps    | 4291200       |
| value_loss         | 0.00810642    |
------------

--------------------------------------
| approxkl           | 0.00082301424 |
| clipfrac           | 0.0103124995  |
| explained_variance | 0.719         |
| fps                | 1126          |
| n_updates          | 1357          |
| policy_entropy     | 0.07188748    |
| policy_loss        | 0.00020249828 |
| serial_timesteps   | 4342400       |
| time_elapsed       | 4.01e+03      |
| total_timesteps    | 4342400       |
| value_loss         | 0.8116799     |
--------------------------------------
--------------------------------------
| approxkl           | 0.001786366   |
| clipfrac           | 0.009765625   |
| explained_variance | 0.396         |
| fps                | 1061          |
| n_updates          | 1358          |
| policy_entropy     | 0.06785088    |
| policy_loss        | -0.0014712703 |
| serial_timesteps   | 4345600       |
| time_elapsed       | 4.02e+03      |
| total_timesteps    | 4345600       |
| value_loss         | 0.0026044578  |
-------------------------

--------------------------------------
| approxkl           | 0.00048854534 |
| clipfrac           | 0.0066406247  |
| explained_variance | 0.376         |
| fps                | 1004          |
| n_updates          | 1374          |
| policy_entropy     | 0.0706972     |
| policy_loss        | -0.0011948356 |
| serial_timesteps   | 4396800       |
| time_elapsed       | 4.07e+03      |
| total_timesteps    | 4396800       |
| value_loss         | 0.0023352643  |
--------------------------------------
--------------------------------------
| approxkl           | 0.002193803   |
| clipfrac           | 0.017578125   |
| explained_variance | 0.382         |
| fps                | 1025          |
| n_updates          | 1375          |
| policy_entropy     | 0.07039445    |
| policy_loss        | -0.0024279333 |
| serial_timesteps   | 4400000       |
| time_elapsed       | 4.07e+03      |
| total_timesteps    | 4400000       |
| value_loss         | 0.0022122369  |
-------------------------

--------------------------------------
| approxkl           | 0.0012160597  |
| clipfrac           | 0.01328125    |
| explained_variance | 0.568         |
| fps                | 1031          |
| n_updates          | 1391          |
| policy_entropy     | 0.07118183    |
| policy_loss        | -0.0017170054 |
| serial_timesteps   | 4451200       |
| time_elapsed       | 4.12e+03      |
| total_timesteps    | 4451200       |
| value_loss         | 0.0014569054  |
--------------------------------------
--------------------------------------
| approxkl           | 0.000925344   |
| clipfrac           | 0.00875       |
| explained_variance | 0.531         |
| fps                | 1072          |
| n_updates          | 1392          |
| policy_entropy     | 0.059756286   |
| policy_loss        | -0.0012103082 |
| serial_timesteps   | 4454400       |
| time_elapsed       | 4.12e+03      |
| total_timesteps    | 4454400       |
| value_loss         | 0.0013778637  |
-------------------------

--------------------------------------
| approxkl           | 0.000880233   |
| clipfrac           | 0.010624999   |
| explained_variance | 0.0295        |
| fps                | 1001          |
| n_updates          | 1408          |
| policy_entropy     | 0.0657145     |
| policy_loss        | -0.0015063398 |
| serial_timesteps   | 4505600       |
| time_elapsed       | 4.17e+03      |
| total_timesteps    | 4505600       |
| value_loss         | 0.0038108034  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0020064916  |
| clipfrac           | 0.01734375    |
| explained_variance | 0.397         |
| fps                | 1005          |
| n_updates          | 1409          |
| policy_entropy     | 0.065427005   |
| policy_loss        | -0.0012482838 |
| serial_timesteps   | 4508800       |
| time_elapsed       | 4.17e+03      |
| total_timesteps    | 4508800       |
| value_loss         | 0.0023107347  |
-------------------------

--------------------------------------
| approxkl           | 0.0016943807  |
| clipfrac           | 0.0178125     |
| explained_variance | 0.0745        |
| fps                | 1093          |
| n_updates          | 1425          |
| policy_entropy     | 0.06728465    |
| policy_loss        | -0.0008962916 |
| serial_timesteps   | 4560000       |
| time_elapsed       | 4.22e+03      |
| total_timesteps    | 4560000       |
| value_loss         | 0.0035579682  |
--------------------------------------
-------------------------------------
| approxkl           | 0.0010242885 |
| clipfrac           | 0.009921875  |
| explained_variance | 0.389        |
| fps                | 1127         |
| n_updates          | 1426         |
| policy_entropy     | 0.06320217   |
| policy_loss        | -0.001439495 |
| serial_timesteps   | 4563200      |
| time_elapsed       | 4.22e+03     |
| total_timesteps    | 4563200      |
| value_loss         | 0.0028173402 |
-------------------------------------

--------------------------------------
| approxkl           | 0.0012635246  |
| clipfrac           | 0.01734375    |
| explained_variance | 0.101         |
| fps                | 972           |
| n_updates          | 1442          |
| policy_entropy     | 0.061359685   |
| policy_loss        | 1.5892725e-05 |
| serial_timesteps   | 4614400       |
| time_elapsed       | 4.27e+03      |
| total_timesteps    | 4614400       |
| value_loss         | 0.014430612   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00050876854 |
| clipfrac           | 0.0056249998  |
| explained_variance | -1.41         |
| fps                | 993           |
| n_updates          | 1443          |
| policy_entropy     | 0.058118712   |
| policy_loss        | -0.001088312  |
| serial_timesteps   | 4617600       |
| time_elapsed       | 4.27e+03      |
| total_timesteps    | 4617600       |
| value_loss         | 0.007762052   |
-------------------------

--------------------------------------
| approxkl           | 0.00074133277 |
| clipfrac           | 0.009453125   |
| explained_variance | 0.247         |
| fps                | 1093          |
| n_updates          | 1459          |
| policy_entropy     | 0.062320597   |
| policy_loss        | -0.0013191749 |
| serial_timesteps   | 4668800       |
| time_elapsed       | 4.32e+03      |
| total_timesteps    | 4668800       |
| value_loss         | 0.0027383459  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0012351344  |
| clipfrac           | 0.0115625     |
| explained_variance | 0.673         |
| fps                | 1121          |
| n_updates          | 1460          |
| policy_entropy     | 0.07207266    |
| policy_loss        | -0.0017767947 |
| serial_timesteps   | 4672000       |
| time_elapsed       | 4.33e+03      |
| total_timesteps    | 4672000       |
| value_loss         | 0.0034632566  |
-------------------------

--------------------------------------
| approxkl           | 0.00061634346 |
| clipfrac           | 0.00640625    |
| explained_variance | 0.487         |
| fps                | 1001          |
| n_updates          | 1476          |
| policy_entropy     | 0.058185123   |
| policy_loss        | -0.0010259456 |
| serial_timesteps   | 4723200       |
| time_elapsed       | 4.37e+03      |
| total_timesteps    | 4723200       |
| value_loss         | 0.0014751721  |
--------------------------------------
---------------------------------------
| approxkl           | 0.001097578    |
| clipfrac           | 0.01171875     |
| explained_variance | 0.492          |
| fps                | 1060           |
| n_updates          | 1477           |
| policy_entropy     | 0.058503184    |
| policy_loss        | -0.00073674525 |
| serial_timesteps   | 4726400        |
| time_elapsed       | 4.38e+03       |
| total_timesteps    | 4726400        |
| value_loss         | 0.0015478389   |
-------------

--------------------------------------
| approxkl           | 0.0007728118  |
| clipfrac           | 0.00984375    |
| explained_variance | 0.542         |
| fps                | 912           |
| n_updates          | 1493          |
| policy_entropy     | 0.061881535   |
| policy_loss        | -0.0009722217 |
| serial_timesteps   | 4777600       |
| time_elapsed       | 4.43e+03      |
| total_timesteps    | 4777600       |
| value_loss         | 0.0016064362  |
--------------------------------------
---------------------------------------
| approxkl           | 0.0010818639   |
| clipfrac           | 0.01203125     |
| explained_variance | 0.623          |
| fps                | 912            |
| n_updates          | 1494           |
| policy_entropy     | 0.060824856    |
| policy_loss        | -0.00020238187 |
| serial_timesteps   | 4780800        |
| time_elapsed       | 4.43e+03       |
| total_timesteps    | 4780800        |
| value_loss         | 0.001399658    |
-------------

--------------------------------------
| approxkl           | 0.0005846745  |
| clipfrac           | 0.005078125   |
| explained_variance | 0.527         |
| fps                | 951           |
| n_updates          | 1510          |
| policy_entropy     | 0.05783622    |
| policy_loss        | -0.0022833818 |
| serial_timesteps   | 4832000       |
| time_elapsed       | 4.49e+03      |
| total_timesteps    | 4832000       |
| value_loss         | 0.007938063   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00041265017 |
| clipfrac           | 0.005         |
| explained_variance | 0.538         |
| fps                | 968           |
| n_updates          | 1511          |
| policy_entropy     | 0.052689232   |
| policy_loss        | -0.0014034878 |
| serial_timesteps   | 4835200       |
| time_elapsed       | 4.49e+03      |
| total_timesteps    | 4835200       |
| value_loss         | 0.0036365464  |
-------------------------

--------------------------------------
| approxkl           | 0.0009136405  |
| clipfrac           | 0.007421875   |
| explained_variance | 0.497         |
| fps                | 926           |
| n_updates          | 1527          |
| policy_entropy     | 0.05541466    |
| policy_loss        | -0.0013614912 |
| serial_timesteps   | 4886400       |
| time_elapsed       | 4.54e+03      |
| total_timesteps    | 4886400       |
| value_loss         | 0.0017141802  |
--------------------------------------
--------------------------------------
| approxkl           | 0.00063489017 |
| clipfrac           | 0.005859375   |
| explained_variance | 0.58          |
| fps                | 964           |
| n_updates          | 1528          |
| policy_entropy     | 0.06570767    |
| policy_loss        | -0.0010485931 |
| serial_timesteps   | 4889600       |
| time_elapsed       | 4.55e+03      |
| total_timesteps    | 4889600       |
| value_loss         | 0.0014684252  |
-------------------------

--------------------------------------
| approxkl           | 0.00076337066 |
| clipfrac           | 0.00953125    |
| explained_variance | 0.582         |
| fps                | 1016          |
| n_updates          | 1544          |
| policy_entropy     | 0.052149568   |
| policy_loss        | -0.001023986  |
| serial_timesteps   | 4940800       |
| time_elapsed       | 4.6e+03       |
| total_timesteps    | 4940800       |
| value_loss         | 0.0019041802  |
--------------------------------------
---------------------------------------
| approxkl           | 0.000390436    |
| clipfrac           | 0.0037500001   |
| explained_variance | 0.408          |
| fps                | 965            |
| n_updates          | 1545           |
| policy_entropy     | 0.052502416    |
| policy_loss        | -0.00090688886 |
| serial_timesteps   | 4944000        |
| time_elapsed       | 4.6e+03        |
| total_timesteps    | 4944000        |
| value_loss         | 0.001714992    |
-------------

--------------------------------------
| approxkl           | 0.0009778237  |
| clipfrac           | 0.00671875    |
| explained_variance | 0.616         |
| fps                | 953           |
| n_updates          | 1561          |
| policy_entropy     | 0.06723802    |
| policy_loss        | -0.0011924133 |
| serial_timesteps   | 4995200       |
| time_elapsed       | 4.66e+03      |
| total_timesteps    | 4995200       |
| value_loss         | 0.0012612448  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0013464325  |
| clipfrac           | 0.012578124   |
| explained_variance | 0.594         |
| fps                | 963           |
| n_updates          | 1562          |
| policy_entropy     | 0.06887908    |
| policy_loss        | -0.0013013791 |
| serial_timesteps   | 4998400       |
| time_elapsed       | 4.66e+03      |
| total_timesteps    | 4998400       |
| value_loss         | 0.0013350414  |
-------------------------

## Testing lesson4

In [68]:
# model_lesson4 = PPO2.load(model_names[4])
test(env_lesson4, model_lesson4, render = False)

Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished
Episode 7 finished
Episode 8 finished
Episode 9 finished
Episode 10 finished
Episode 11 finished
Episode 12 finished
Episode 13 finished
Episode 14 finished
Episode 15 finished
Episode 16 finished
Episode 17 finished
Episode 18 finished
Episode 19 finished
Episode 20 finished
Episode 21 finished
Episode 22 finished
Episode 23 finished
Episode 24 finished
Episode 25 finished
Episode 26 finished
Episode 27 finished
Episode 28 finished
Episode 29 finished
Episode 30 finished
Episode 31 finished
Episode 32 finished
Episode 33 finished
Episode 34 finished
Episode 35 finished
Episode 36 finished
Episode 37 finished
Episode 38 finished
Episode 39 finished
Episode 40 finished
Episode 41 finished
Episode 42 finished
Episode 43 finished
Episode 44 finished
Episode 45 finished
Episode 46 finished
Episode 47 finished
Episode 48 finished
Episode 49 finished
Episode 50

## Training lesson 5
### 11x11 grid with 64 wooden boxes

In [69]:
config_lesson5 = wood_box_lesson5_env()
env_lesson5 = initialize_env(config_lesson5)

In [70]:
# model_lesson4 = PPO2.load(load_path = model_names[4],
#                           tensorboard_log = "./ppo2_pommerman_box_collect_tensorboard/")
model_lesson5 = train(model_name = model_names[5], 
                      model = model_lesson4,
                      env = env_lesson5, 
                      n_steps = n_steps, 
                      total_timesteps = total_timestep)


--------------------------------------
| approxkl           | 0.0009848711  |
| clipfrac           | 0.009921875   |
| explained_variance | 0.68          |
| fps                | 898           |
| n_updates          | 1             |
| policy_entropy     | 0.075007915   |
| policy_loss        | -0.0009413279 |
| serial_timesteps   | 3200          |
| time_elapsed       | 3.1e-06       |
| total_timesteps    | 3200          |
| value_loss         | 0.0010606942  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0010833209  |
| clipfrac           | 0.01203125    |
| explained_variance | 0.657         |
| fps                | 867           |
| n_updates          | 2             |
| policy_entropy     | 0.0730928     |
| policy_loss        | -0.0014716615 |
| serial_timesteps   | 6400          |
| time_elapsed       | 3.57          |
| total_timesteps    | 6400          |
| value_loss         | 0.0010748311  |
-------------------------

--------------------------------------
| approxkl           | 0.0011965457  |
| clipfrac           | 0.011015625   |
| explained_variance | 0.344         |
| fps                | 933           |
| n_updates          | 18            |
| policy_entropy     | 0.07257196    |
| policy_loss        | -0.0023744907 |
| serial_timesteps   | 57600         |
| time_elapsed       | 58.5          |
| total_timesteps    | 57600         |
| value_loss         | 0.0017274539  |
--------------------------------------
--------------------------------------
| approxkl           | 0.00316277    |
| clipfrac           | 0.026562499   |
| explained_variance | 0.591         |
| fps                | 939           |
| n_updates          | 19            |
| policy_entropy     | 0.07365044    |
| policy_loss        | -0.0010087532 |
| serial_timesteps   | 60800         |
| time_elapsed       | 62            |
| total_timesteps    | 60800         |
| value_loss         | 0.005554967   |
-------------------------

--------------------------------------
| approxkl           | 0.0006782301  |
| clipfrac           | 0.0065625003  |
| explained_variance | 0.567         |
| fps                | 915           |
| n_updates          | 35            |
| policy_entropy     | 0.07740382    |
| policy_loss        | -0.0011141188 |
| serial_timesteps   | 112000        |
| time_elapsed       | 119           |
| total_timesteps    | 112000        |
| value_loss         | 0.0014454821  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0011731493  |
| clipfrac           | 0.013203125   |
| explained_variance | 0.0675        |
| fps                | 945           |
| n_updates          | 36            |
| policy_entropy     | 0.08587828    |
| policy_loss        | -0.0016519896 |
| serial_timesteps   | 115200        |
| time_elapsed       | 122           |
| total_timesteps    | 115200        |
| value_loss         | 0.00300695    |
-------------------------

--------------------------------------
| approxkl           | 0.00046118506 |
| clipfrac           | 0.005390625   |
| explained_variance | 0.423         |
| fps                | 952           |
| n_updates          | 52            |
| policy_entropy     | 0.07415887    |
| policy_loss        | -0.0009648249 |
| serial_timesteps   | 166400        |
| time_elapsed       | 178           |
| total_timesteps    | 166400        |
| value_loss         | 0.001977024   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00051443774 |
| clipfrac           | 0.00578125    |
| explained_variance | 0.424         |
| fps                | 964           |
| n_updates          | 53            |
| policy_entropy     | 0.07077193    |
| policy_loss        | -0.0011574766 |
| serial_timesteps   | 169600        |
| time_elapsed       | 181           |
| total_timesteps    | 169600        |
| value_loss         | 0.0016949459  |
-------------------------

--------------------------------------
| approxkl           | 0.0023930995  |
| clipfrac           | 0.024296876   |
| explained_variance | 0.54          |
| fps                | 959           |
| n_updates          | 69            |
| policy_entropy     | 0.072768256   |
| policy_loss        | -0.0007463481 |
| serial_timesteps   | 220800        |
| time_elapsed       | 234           |
| total_timesteps    | 220800        |
| value_loss         | 0.001512497   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0009605742  |
| clipfrac           | 0.013046876   |
| explained_variance | 0.698         |
| fps                | 929           |
| n_updates          | 70            |
| policy_entropy     | 0.11086406    |
| policy_loss        | 0.00065442035 |
| serial_timesteps   | 224000        |
| time_elapsed       | 237           |
| total_timesteps    | 224000        |
| value_loss         | 5.5939546     |
-------------------------

---------------------------------------
| approxkl           | 0.0018755073   |
| clipfrac           | 0.02           |
| explained_variance | 0.185          |
| fps                | 1022           |
| n_updates          | 86             |
| policy_entropy     | 0.07808457     |
| policy_loss        | -0.00076009403 |
| serial_timesteps   | 275200         |
| time_elapsed       | 286            |
| total_timesteps    | 275200         |
| value_loss         | 0.0083987145   |
---------------------------------------
---------------------------------------
| approxkl           | 0.0021391795   |
| clipfrac           | 0.02328125     |
| explained_variance | 0.403          |
| fps                | 1019           |
| n_updates          | 87             |
| policy_entropy     | 0.07978827     |
| policy_loss        | -0.00073938817 |
| serial_timesteps   | 278400         |
| time_elapsed       | 289            |
| total_timesteps    | 278400         |
| value_loss         | 0.002234662    |


--------------------------------------
| approxkl           | 0.00038543448 |
| clipfrac           | 0.005078125   |
| explained_variance | 0.641         |
| fps                | 1008          |
| n_updates          | 103           |
| policy_entropy     | 0.060506016   |
| policy_loss        | -0.0006993526 |
| serial_timesteps   | 329600        |
| time_elapsed       | 339           |
| total_timesteps    | 329600        |
| value_loss         | 0.001092286   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0015576445  |
| clipfrac           | 0.014453124   |
| explained_variance | 0.582         |
| fps                | 1047          |
| n_updates          | 104           |
| policy_entropy     | 0.06416351    |
| policy_loss        | -0.0020715129 |
| serial_timesteps   | 332800        |
| time_elapsed       | 343           |
| total_timesteps    | 332800        |
| value_loss         | 0.007050385   |
-------------------------

--------------------------------------
| approxkl           | 0.0025430406  |
| clipfrac           | 0.022109374   |
| explained_variance | 0.249         |
| fps                | 968           |
| n_updates          | 120           |
| policy_entropy     | 0.08853668    |
| policy_loss        | -0.0007889253 |
| serial_timesteps   | 384000        |
| time_elapsed       | 393           |
| total_timesteps    | 384000        |
| value_loss         | 0.0053823483  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0013980693  |
| clipfrac           | 0.018828126   |
| explained_variance | 0.595         |
| fps                | 952           |
| n_updates          | 121           |
| policy_entropy     | 0.07556951    |
| policy_loss        | -0.0008872889 |
| serial_timesteps   | 387200        |
| time_elapsed       | 396           |
| total_timesteps    | 387200        |
| value_loss         | 0.0013180674  |
-------------------------

--------------------------------------
| approxkl           | 0.000501666   |
| clipfrac           | 0.00546875    |
| explained_variance | 0.775         |
| fps                | 1119          |
| n_updates          | 137           |
| policy_entropy     | 0.056133047   |
| policy_loss        | -0.0019955593 |
| serial_timesteps   | 438400        |
| time_elapsed       | 445           |
| total_timesteps    | 438400        |
| value_loss         | 0.0026840544  |
--------------------------------------
--------------------------------------
| approxkl           | 0.00070904335 |
| clipfrac           | 0.0082812505  |
| explained_variance | 0.613         |
| fps                | 1105          |
| n_updates          | 138           |
| policy_entropy     | 0.063370235   |
| policy_loss        | -0.0008717229 |
| serial_timesteps   | 441600        |
| time_elapsed       | 448           |
| total_timesteps    | 441600        |
| value_loss         | 0.0012325168  |
-------------------------

---------------------------------------
| approxkl           | 0.00057563814  |
| clipfrac           | 0.006015625    |
| explained_variance | 0.112          |
| fps                | 1027           |
| n_updates          | 154            |
| policy_entropy     | 0.051458895    |
| policy_loss        | -0.00045323162 |
| serial_timesteps   | 492800         |
| time_elapsed       | 499            |
| total_timesteps    | 492800         |
| value_loss         | 0.0026592033   |
---------------------------------------
--------------------------------------
| approxkl           | 0.0011967653  |
| clipfrac           | 0.0128125     |
| explained_variance | 0.169         |
| fps                | 1047          |
| n_updates          | 155           |
| policy_entropy     | 0.051098555   |
| policy_loss        | -0.0011011527 |
| serial_timesteps   | 496000        |
| time_elapsed       | 502           |
| total_timesteps    | 496000        |
| value_loss         | 0.0024334511  |
------------

--------------------------------------
| approxkl           | 0.001641746   |
| clipfrac           | 0.013515625   |
| explained_variance | -0.088        |
| fps                | 1030          |
| n_updates          | 171           |
| policy_entropy     | 0.05056712    |
| policy_loss        | -0.0006769316 |
| serial_timesteps   | 547200        |
| time_elapsed       | 553           |
| total_timesteps    | 547200        |
| value_loss         | 0.0027507986  |
--------------------------------------
--------------------------------------
| approxkl           | 0.00090430304 |
| clipfrac           | 0.00578125    |
| explained_variance | 0.884         |
| fps                | 1094          |
| n_updates          | 172           |
| policy_entropy     | 0.048184767   |
| policy_loss        | -0.000981177  |
| serial_timesteps   | 550400        |
| time_elapsed       | 557           |
| total_timesteps    | 550400        |
| value_loss         | 0.012443158   |
-------------------------

--------------------------------------
| approxkl           | 0.00039159541 |
| clipfrac           | 0.004140625   |
| explained_variance | 0.378         |
| fps                | 1089          |
| n_updates          | 188           |
| policy_entropy     | 0.05374439    |
| policy_loss        | -0.0011842137 |
| serial_timesteps   | 601600        |
| time_elapsed       | 603           |
| total_timesteps    | 601600        |
| value_loss         | 0.0019554286  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0012358858  |
| clipfrac           | 0.007421875   |
| explained_variance | 0.363         |
| fps                | 1110          |
| n_updates          | 189           |
| policy_entropy     | 0.05356978    |
| policy_loss        | -0.0013841942 |
| serial_timesteps   | 604800        |
| time_elapsed       | 606           |
| total_timesteps    | 604800        |
| value_loss         | 0.0053491388  |
-------------------------

--------------------------------------
| approxkl           | 0.00150773    |
| clipfrac           | 0.01140625    |
| explained_variance | 0.222         |
| fps                | 1103          |
| n_updates          | 205           |
| policy_entropy     | 0.057327252   |
| policy_loss        | -0.0013079605 |
| serial_timesteps   | 656000        |
| time_elapsed       | 655           |
| total_timesteps    | 656000        |
| value_loss         | 0.005385125   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00026094573 |
| clipfrac           | 0.00234375    |
| explained_variance | 0.647         |
| fps                | 1076          |
| n_updates          | 206           |
| policy_entropy     | 0.055358734   |
| policy_loss        | -0.0010094036 |
| serial_timesteps   | 659200        |
| time_elapsed       | 658           |
| total_timesteps    | 659200        |
| value_loss         | 0.0036403788  |
-------------------------

--------------------------------------
| approxkl           | 0.00048013107 |
| clipfrac           | 0.005703125   |
| explained_variance | 0.646         |
| fps                | 992           |
| n_updates          | 222           |
| policy_entropy     | 0.052421197   |
| policy_loss        | -0.0008023919 |
| serial_timesteps   | 710400        |
| time_elapsed       | 708           |
| total_timesteps    | 710400        |
| value_loss         | 0.0010087085  |
--------------------------------------
--------------------------------------
| approxkl           | 0.00061413634 |
| clipfrac           | 0.0054687504  |
| explained_variance | 0.659         |
| fps                | 968           |
| n_updates          | 223           |
| policy_entropy     | 0.047058433   |
| policy_loss        | -0.0006347482 |
| serial_timesteps   | 713600        |
| time_elapsed       | 711           |
| total_timesteps    | 713600        |
| value_loss         | 0.0009874423  |
-------------------------

--------------------------------------
| approxkl           | 0.00041830787 |
| clipfrac           | 0.0032812501  |
| explained_variance | 0.638         |
| fps                | 1094          |
| n_updates          | 239           |
| policy_entropy     | 0.04950433    |
| policy_loss        | -0.001247493  |
| serial_timesteps   | 764800        |
| time_elapsed       | 760           |
| total_timesteps    | 764800        |
| value_loss         | 0.0011050876  |
--------------------------------------
---------------------------------------
| approxkl           | 0.0005810864   |
| clipfrac           | 0.008046875    |
| explained_variance | 0.69           |
| fps                | 1097           |
| n_updates          | 240            |
| policy_entropy     | 0.04875004     |
| policy_loss        | -0.00043851786 |
| serial_timesteps   | 768000         |
| time_elapsed       | 762            |
| total_timesteps    | 768000         |
| value_loss         | 0.0008861872   |
-------------

--------------------------------------
| approxkl           | 0.0011895394  |
| clipfrac           | 0.0103124995  |
| explained_variance | 0.509         |
| fps                | 1131          |
| n_updates          | 256           |
| policy_entropy     | 0.04826375    |
| policy_loss        | -0.0016857076 |
| serial_timesteps   | 819200        |
| time_elapsed       | 810           |
| total_timesteps    | 819200        |
| value_loss         | 0.0015571024  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0012196191  |
| clipfrac           | 0.009375      |
| explained_variance | 0.266         |
| fps                | 587           |
| n_updates          | 257           |
| policy_entropy     | 0.059833538   |
| policy_loss        | -0.0019912694 |
| serial_timesteps   | 822400        |
| time_elapsed       | 813           |
| total_timesteps    | 822400        |
| value_loss         | 0.0050134584  |
-------------------------

---------------------------------------
| approxkl           | 0.0024663678   |
| clipfrac           | 0.016640624    |
| explained_variance | 0.646          |
| fps                | 1083           |
| n_updates          | 273            |
| policy_entropy     | 0.056665372    |
| policy_loss        | -0.00071213994 |
| serial_timesteps   | 873600         |
| time_elapsed       | 863            |
| total_timesteps    | 873600         |
| value_loss         | 0.0010890882   |
---------------------------------------
---------------------------------------
| approxkl           | 0.0004960831   |
| clipfrac           | 0.005234375    |
| explained_variance | 0.659          |
| fps                | 1100           |
| n_updates          | 274            |
| policy_entropy     | 0.042849243    |
| policy_loss        | -0.00082433224 |
| serial_timesteps   | 876800         |
| time_elapsed       | 866            |
| total_timesteps    | 876800         |
| value_loss         | 0.0008773838   |


--------------------------------------
| approxkl           | 0.0013327844  |
| clipfrac           | 0.012968751   |
| explained_variance | 0.661         |
| fps                | 1074          |
| n_updates          | 290           |
| policy_entropy     | 0.06578144    |
| policy_loss        | -0.0012326482 |
| serial_timesteps   | 928000        |
| time_elapsed       | 914           |
| total_timesteps    | 928000        |
| value_loss         | 0.001081804   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00087375345 |
| clipfrac           | 0.009765625   |
| explained_variance | 0.677         |
| fps                | 1098          |
| n_updates          | 291           |
| policy_entropy     | 0.06360487    |
| policy_loss        | -0.0007751569 |
| serial_timesteps   | 931200        |
| time_elapsed       | 917           |
| total_timesteps    | 931200        |
| value_loss         | 0.0010355441  |
-------------------------

--------------------------------------
| approxkl           | 0.0012953605  |
| clipfrac           | 0.010703125   |
| explained_variance | 0.686         |
| fps                | 1045          |
| n_updates          | 307           |
| policy_entropy     | 0.069639236   |
| policy_loss        | -0.0014431074 |
| serial_timesteps   | 982400        |
| time_elapsed       | 965           |
| total_timesteps    | 982400        |
| value_loss         | 0.0008896393  |
--------------------------------------
--------------------------------------
| approxkl           | 0.001132784   |
| clipfrac           | 0.01125       |
| explained_variance | 0.621         |
| fps                | 1065          |
| n_updates          | 308           |
| policy_entropy     | 0.073922336   |
| policy_loss        | -0.0013799279 |
| serial_timesteps   | 985600        |
| time_elapsed       | 968           |
| total_timesteps    | 985600        |
| value_loss         | 0.0010342496  |
-------------------------

-------------------------------------
| approxkl           | 0.0010375683 |
| clipfrac           | 0.0115625    |
| explained_variance | 0.711        |
| fps                | 1096         |
| n_updates          | 324          |
| policy_entropy     | 0.06373245   |
| policy_loss        | -0.001459247 |
| serial_timesteps   | 1036800      |
| time_elapsed       | 1.02e+03     |
| total_timesteps    | 1036800      |
| value_loss         | 0.0008677129 |
-------------------------------------
---------------------------------------
| approxkl           | 0.0009861601   |
| clipfrac           | 0.00921875     |
| explained_variance | 0.718          |
| fps                | 1117           |
| n_updates          | 325            |
| policy_entropy     | 0.0594759      |
| policy_loss        | -0.00078826497 |
| serial_timesteps   | 1040000        |
| time_elapsed       | 1.02e+03       |
| total_timesteps    | 1040000        |
| value_loss         | 0.00255547     |
--------------------------

--------------------------------------
| approxkl           | 0.0015916582  |
| clipfrac           | 0.00890625    |
| explained_variance | 0.697         |
| fps                | 1082          |
| n_updates          | 341           |
| policy_entropy     | 0.047476277   |
| policy_loss        | -0.0012860232 |
| serial_timesteps   | 1091200       |
| time_elapsed       | 1.07e+03      |
| total_timesteps    | 1091200       |
| value_loss         | 0.0008451882  |
--------------------------------------
---------------------------------------
| approxkl           | 0.0009273633   |
| clipfrac           | 0.009375       |
| explained_variance | 0.712          |
| fps                | 1121           |
| n_updates          | 342            |
| policy_entropy     | 0.048249688    |
| policy_loss        | -0.00083778595 |
| serial_timesteps   | 1094400        |
| time_elapsed       | 1.08e+03       |
| total_timesteps    | 1094400        |
| value_loss         | 0.0008494057   |
-------------

--------------------------------------
| approxkl           | 0.0009825523  |
| clipfrac           | 0.008593749   |
| explained_variance | 0.712         |
| fps                | 1114          |
| n_updates          | 358           |
| policy_entropy     | 0.049114104   |
| policy_loss        | -0.0013342358 |
| serial_timesteps   | 1145600       |
| time_elapsed       | 1.12e+03      |
| total_timesteps    | 1145600       |
| value_loss         | 0.00091366254 |
--------------------------------------
--------------------------------------
| approxkl           | 0.00056249293 |
| clipfrac           | 0.006796875   |
| explained_variance | 0.72          |
| fps                | 1103          |
| n_updates          | 359           |
| policy_entropy     | 0.05404289    |
| policy_loss        | -0.0010413345 |
| serial_timesteps   | 1148800       |
| time_elapsed       | 1.12e+03      |
| total_timesteps    | 1148800       |
| value_loss         | 0.0007218178  |
-------------------------

--------------------------------------
| approxkl           | 0.0010629578  |
| clipfrac           | 0.00859375    |
| explained_variance | 0.712         |
| fps                | 1084          |
| n_updates          | 375           |
| policy_entropy     | 0.05019117    |
| policy_loss        | -0.0013483296 |
| serial_timesteps   | 1200000       |
| time_elapsed       | 1.17e+03      |
| total_timesteps    | 1200000       |
| value_loss         | 0.00085764815 |
--------------------------------------
--------------------------------------
| approxkl           | 0.0006858342  |
| clipfrac           | 0.0073437504  |
| explained_variance | 0.701         |
| fps                | 1095          |
| n_updates          | 376           |
| policy_entropy     | 0.051579423   |
| policy_loss        | -0.0011045681 |
| serial_timesteps   | 1203200       |
| time_elapsed       | 1.17e+03      |
| total_timesteps    | 1203200       |
| value_loss         | 0.0008166741  |
-------------------------

---------------------------------------
| approxkl           | 0.00065525644  |
| clipfrac           | 0.0073437504   |
| explained_variance | 0.681          |
| fps                | 1117           |
| n_updates          | 392            |
| policy_entropy     | 0.070541345    |
| policy_loss        | -0.00042872922 |
| serial_timesteps   | 1254400        |
| time_elapsed       | 1.22e+03       |
| total_timesteps    | 1254400        |
| value_loss         | 0.0009115022   |
---------------------------------------
--------------------------------------
| approxkl           | 0.0014866244  |
| clipfrac           | 0.01515625    |
| explained_variance | 0.311         |
| fps                | 1107          |
| n_updates          | 393           |
| policy_entropy     | 0.066669956   |
| policy_loss        | -0.0029710107 |
| serial_timesteps   | 1257600       |
| time_elapsed       | 1.22e+03      |
| total_timesteps    | 1257600       |
| value_loss         | 0.004899718   |
------------

--------------------------------------
| approxkl           | 0.00094160665 |
| clipfrac           | 0.01046875    |
| explained_variance | -0.181        |
| fps                | 1110          |
| n_updates          | 409           |
| policy_entropy     | 0.050970886   |
| policy_loss        | -0.0009847688 |
| serial_timesteps   | 1308800       |
| time_elapsed       | 1.27e+03      |
| total_timesteps    | 1308800       |
| value_loss         | 0.0031101329  |
--------------------------------------
---------------------------------------
| approxkl           | 0.000351795    |
| clipfrac           | 0.00390625     |
| explained_variance | -0.159         |
| fps                | 1082           |
| n_updates          | 410            |
| policy_entropy     | 0.04077165     |
| policy_loss        | -0.00033889906 |
| serial_timesteps   | 1312000        |
| time_elapsed       | 1.27e+03       |
| total_timesteps    | 1312000        |
| value_loss         | 0.002458535    |
-------------

--------------------------------------
| approxkl           | 0.0007060056  |
| clipfrac           | 0.00734375    |
| explained_variance | 0.699         |
| fps                | 1118          |
| n_updates          | 426           |
| policy_entropy     | 0.047277298   |
| policy_loss        | -0.0013506672 |
| serial_timesteps   | 1363200       |
| time_elapsed       | 1.32e+03      |
| total_timesteps    | 1363200       |
| value_loss         | 0.0008657082  |
--------------------------------------
-------------------------------------
| approxkl           | 0.0009874674 |
| clipfrac           | 0.009765625  |
| explained_variance | 0.696        |
| fps                | 1121         |
| n_updates          | 427          |
| policy_entropy     | 0.049628336  |
| policy_loss        | -0.001438494 |
| serial_timesteps   | 1366400      |
| time_elapsed       | 1.32e+03     |
| total_timesteps    | 1366400      |
| value_loss         | 0.0029131006 |
-------------------------------------

--------------------------------------
| approxkl           | 0.00046949816 |
| clipfrac           | 0.005390625   |
| explained_variance | 0.376         |
| fps                | 952           |
| n_updates          | 443           |
| policy_entropy     | 0.05921501    |
| policy_loss        | -0.0009777537 |
| serial_timesteps   | 1417600       |
| time_elapsed       | 1.37e+03      |
| total_timesteps    | 1417600       |
| value_loss         | 0.003986769   |
--------------------------------------
---------------------------------------
| approxkl           | 0.0003416908   |
| clipfrac           | 0.003828125    |
| explained_variance | 0.0774         |
| fps                | 971            |
| n_updates          | 444            |
| policy_entropy     | 0.06199586     |
| policy_loss        | -0.00035671904 |
| serial_timesteps   | 1420800        |
| time_elapsed       | 1.37e+03       |
| total_timesteps    | 1420800        |
| value_loss         | 0.0027092176   |
-------------

--------------------------------------
| approxkl           | 0.0019977808  |
| clipfrac           | 0.01578125    |
| explained_variance | 0.642         |
| fps                | 1015          |
| n_updates          | 460           |
| policy_entropy     | 0.056936532   |
| policy_loss        | -0.0014268378 |
| serial_timesteps   | 1472000       |
| time_elapsed       | 1.43e+03      |
| total_timesteps    | 1472000       |
| value_loss         | 0.0009260179  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0004098283  |
| clipfrac           | 0.0045312503  |
| explained_variance | 0.668         |
| fps                | 1040          |
| n_updates          | 461           |
| policy_entropy     | 0.054788597   |
| policy_loss        | -0.0012595606 |
| serial_timesteps   | 1475200       |
| time_elapsed       | 1.43e+03      |
| total_timesteps    | 1475200       |
| value_loss         | 0.00091170205 |
-------------------------

--------------------------------------
| approxkl           | 0.0011017245  |
| clipfrac           | 0.011953125   |
| explained_variance | 0.579         |
| fps                | 1020          |
| n_updates          | 477           |
| policy_entropy     | 0.06265455    |
| policy_loss        | -0.0012627621 |
| serial_timesteps   | 1526400       |
| time_elapsed       | 1.48e+03      |
| total_timesteps    | 1526400       |
| value_loss         | 0.0012091838  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0022779694  |
| clipfrac           | 0.01796875    |
| explained_variance | 0.552         |
| fps                | 1030          |
| n_updates          | 478           |
| policy_entropy     | 0.060403142   |
| policy_loss        | -0.0014992072 |
| serial_timesteps   | 1529600       |
| time_elapsed       | 1.48e+03      |
| total_timesteps    | 1529600       |
| value_loss         | 0.0011858459  |
-------------------------

--------------------------------------
| approxkl           | 0.0010084794  |
| clipfrac           | 0.0075000003  |
| explained_variance | 0.325         |
| fps                | 1054          |
| n_updates          | 494           |
| policy_entropy     | 0.05156417    |
| policy_loss        | -0.0015254125 |
| serial_timesteps   | 1580800       |
| time_elapsed       | 1.53e+03      |
| total_timesteps    | 1580800       |
| value_loss         | 0.0019486307  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0017903484  |
| clipfrac           | 0.012890625   |
| explained_variance | 0.392         |
| fps                | 1024          |
| n_updates          | 495           |
| policy_entropy     | 0.049724694   |
| policy_loss        | -0.0015193693 |
| serial_timesteps   | 1584000       |
| time_elapsed       | 1.53e+03      |
| total_timesteps    | 1584000       |
| value_loss         | 0.0019113326  |
-------------------------

--------------------------------------
| approxkl           | 0.0009689734  |
| clipfrac           | 0.008828125   |
| explained_variance | 0.533         |
| fps                | 1062          |
| n_updates          | 511           |
| policy_entropy     | 0.05527675    |
| policy_loss        | -0.0008715622 |
| serial_timesteps   | 1635200       |
| time_elapsed       | 1.58e+03      |
| total_timesteps    | 1635200       |
| value_loss         | 0.001273238   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0005907143  |
| clipfrac           | 0.005390625   |
| explained_variance | 0.625         |
| fps                | 1055          |
| n_updates          | 512           |
| policy_entropy     | 0.046495974   |
| policy_loss        | -0.0013581965 |
| serial_timesteps   | 1638400       |
| time_elapsed       | 1.59e+03      |
| total_timesteps    | 1638400       |
| value_loss         | 0.0010517903  |
-------------------------

--------------------------------------
| approxkl           | 0.00022872633 |
| clipfrac           | 0.00234375    |
| explained_variance | 0.676         |
| fps                | 1083          |
| n_updates          | 528           |
| policy_entropy     | 0.05047471    |
| policy_loss        | -0.0003245288 |
| serial_timesteps   | 1689600       |
| time_elapsed       | 1.64e+03      |
| total_timesteps    | 1689600       |
| value_loss         | 0.00089299027 |
--------------------------------------
---------------------------------------
| approxkl           | 0.0008422207   |
| clipfrac           | 0.010078125    |
| explained_variance | 0.643          |
| fps                | 1049           |
| n_updates          | 529            |
| policy_entropy     | 0.05877529     |
| policy_loss        | -0.00046282355 |
| serial_timesteps   | 1692800        |
| time_elapsed       | 1.64e+03       |
| total_timesteps    | 1692800        |
| value_loss         | 0.001000585    |
-------------

---------------------------------------
| approxkl           | 0.0008682282   |
| clipfrac           | 0.008984375    |
| explained_variance | 0.418          |
| fps                | 1070           |
| n_updates          | 545            |
| policy_entropy     | 0.059624102    |
| policy_loss        | -0.00077035045 |
| serial_timesteps   | 1744000        |
| time_elapsed       | 1.69e+03       |
| total_timesteps    | 1744000        |
| value_loss         | 0.0015949454   |
---------------------------------------
---------------------------------------
| approxkl           | 0.0005981249   |
| clipfrac           | 0.00796875     |
| explained_variance | 0.488          |
| fps                | 1101           |
| n_updates          | 546            |
| policy_entropy     | 0.049670838    |
| policy_loss        | -0.00063851965 |
| serial_timesteps   | 1747200        |
| time_elapsed       | 1.69e+03       |
| total_timesteps    | 1747200        |
| value_loss         | 0.0014995891   |


--------------------------------------
| approxkl           | 0.0007417203  |
| clipfrac           | 0.00578125    |
| explained_variance | 0.636         |
| fps                | 1118          |
| n_updates          | 562           |
| policy_entropy     | 0.04034666    |
| policy_loss        | -0.0012294311 |
| serial_timesteps   | 1798400       |
| time_elapsed       | 1.74e+03      |
| total_timesteps    | 1798400       |
| value_loss         | 0.0009766934  |
--------------------------------------
---------------------------------------
| approxkl           | 0.00027104316  |
| clipfrac           | 0.0031249998   |
| explained_variance | 0.651          |
| fps                | 1160           |
| n_updates          | 563            |
| policy_entropy     | 0.04446926     |
| policy_loss        | -0.00055414496 |
| serial_timesteps   | 1801600        |
| time_elapsed       | 1.74e+03       |
| total_timesteps    | 1801600        |
| value_loss         | 0.0008363803   |
-------------

--------------------------------------
| approxkl           | 0.0008013167  |
| clipfrac           | 0.00703125    |
| explained_variance | 0.583         |
| fps                | 1056          |
| n_updates          | 579           |
| policy_entropy     | 0.04892271    |
| policy_loss        | -0.0010153061 |
| serial_timesteps   | 1852800       |
| time_elapsed       | 1.79e+03      |
| total_timesteps    | 1852800       |
| value_loss         | 0.001225903   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00075890915 |
| clipfrac           | 0.00546875    |
| explained_variance | 0.592         |
| fps                | 1100          |
| n_updates          | 580           |
| policy_entropy     | 0.04499517    |
| policy_loss        | -0.0009461647 |
| serial_timesteps   | 1856000       |
| time_elapsed       | 1.79e+03      |
| total_timesteps    | 1856000       |
| value_loss         | 0.0010675199  |
-------------------------

--------------------------------------
| approxkl           | 0.00058095023 |
| clipfrac           | 0.004765625   |
| explained_variance | 0.634         |
| fps                | 1159          |
| n_updates          | 596           |
| policy_entropy     | 0.039955992   |
| policy_loss        | -0.0016967829 |
| serial_timesteps   | 1907200       |
| time_elapsed       | 1.84e+03      |
| total_timesteps    | 1907200       |
| value_loss         | 0.00096490816 |
--------------------------------------
---------------------------------------
| approxkl           | 0.00045379525  |
| clipfrac           | 0.0046093753   |
| explained_variance | 0.706          |
| fps                | 1108           |
| n_updates          | 597            |
| policy_entropy     | 0.040021658    |
| policy_loss        | -0.00070710306 |
| serial_timesteps   | 1910400        |
| time_elapsed       | 1.84e+03       |
| total_timesteps    | 1910400        |
| value_loss         | 0.0007799988   |
-------------

--------------------------------------
| approxkl           | 0.0015109098  |
| clipfrac           | 0.014687501   |
| explained_variance | 0.699         |
| fps                | 1126          |
| n_updates          | 613           |
| policy_entropy     | 0.046285477   |
| policy_loss        | -0.0011103764 |
| serial_timesteps   | 1961600       |
| time_elapsed       | 1.88e+03      |
| total_timesteps    | 1961600       |
| value_loss         | 0.000822755   |
--------------------------------------
---------------------------------------
| approxkl           | 0.000342702    |
| clipfrac           | 0.003203125    |
| explained_variance | 0.696          |
| fps                | 1117           |
| n_updates          | 614            |
| policy_entropy     | 0.03744776     |
| policy_loss        | -0.00050992053 |
| serial_timesteps   | 1964800        |
| time_elapsed       | 1.89e+03       |
| total_timesteps    | 1964800        |
| value_loss         | 0.00069987174  |
-------------

--------------------------------------
| approxkl           | 0.0011981761  |
| clipfrac           | 0.013046875   |
| explained_variance | 0.716         |
| fps                | 1080          |
| n_updates          | 630           |
| policy_entropy     | 0.061303668   |
| policy_loss        | -0.0007631774 |
| serial_timesteps   | 2016000       |
| time_elapsed       | 1.93e+03      |
| total_timesteps    | 2016000       |
| value_loss         | 0.0006826333  |
--------------------------------------
--------------------------------------
| approxkl           | 0.00040654588 |
| clipfrac           | 0.0046093753  |
| explained_variance | 0.711         |
| fps                | 1097          |
| n_updates          | 631           |
| policy_entropy     | 0.05454718    |
| policy_loss        | -0.0010845892 |
| serial_timesteps   | 2019200       |
| time_elapsed       | 1.94e+03      |
| total_timesteps    | 2019200       |
| value_loss         | 0.0007304072  |
-------------------------

---------------------------------------
| approxkl           | 0.0004176795   |
| clipfrac           | 0.0051562497   |
| explained_variance | 0.675          |
| fps                | 1146           |
| n_updates          | 647            |
| policy_entropy     | 0.05766089     |
| policy_loss        | -0.00060785445 |
| serial_timesteps   | 2070400        |
| time_elapsed       | 1.98e+03       |
| total_timesteps    | 2070400        |
| value_loss         | 0.00094464846  |
---------------------------------------
--------------------------------------
| approxkl           | 0.0019139079  |
| clipfrac           | 0.014609376   |
| explained_variance | 0.712         |
| fps                | 1140          |
| n_updates          | 648           |
| policy_entropy     | 0.05645137    |
| policy_loss        | -0.0013219933 |
| serial_timesteps   | 2073600       |
| time_elapsed       | 1.99e+03      |
| total_timesteps    | 2073600       |
| value_loss         | 0.00309285    |
------------

---------------------------------------
| approxkl           | 0.0009751474   |
| clipfrac           | 0.00765625     |
| explained_variance | 0.478          |
| fps                | 1143           |
| n_updates          | 664            |
| policy_entropy     | 0.052873358    |
| policy_loss        | -0.00089420634 |
| serial_timesteps   | 2124800        |
| time_elapsed       | 2.03e+03       |
| total_timesteps    | 2124800        |
| value_loss         | 0.0044333264   |
---------------------------------------
--------------------------------------
| approxkl           | 0.0019095936  |
| clipfrac           | 0.0146875     |
| explained_variance | 0.71          |
| fps                | 1130          |
| n_updates          | 665           |
| policy_entropy     | 0.05709464    |
| policy_loss        | -0.0010542375 |
| serial_timesteps   | 2128000       |
| time_elapsed       | 2.04e+03      |
| total_timesteps    | 2128000       |
| value_loss         | 0.0007204544  |
------------

-------------------------------------
| approxkl           | 0.0020795693 |
| clipfrac           | 0.015        |
| explained_variance | 0.681        |
| fps                | 1051         |
| n_updates          | 681          |
| policy_entropy     | 0.049123965  |
| policy_loss        | -0.002600487 |
| serial_timesteps   | 2179200      |
| time_elapsed       | 2.08e+03     |
| total_timesteps    | 2179200      |
| value_loss         | 0.0030748064 |
-------------------------------------
--------------------------------------
| approxkl           | 0.0037327278  |
| clipfrac           | 0.022421874   |
| explained_variance | 0.665         |
| fps                | 1070          |
| n_updates          | 682           |
| policy_entropy     | 0.060291912   |
| policy_loss        | -0.0022201582 |
| serial_timesteps   | 2182400       |
| time_elapsed       | 2.09e+03      |
| total_timesteps    | 2182400       |
| value_loss         | 0.0010390796  |
--------------------------------------

--------------------------------------
| approxkl           | 0.00040024967 |
| clipfrac           | 0.004921875   |
| explained_variance | 0.703         |
| fps                | 1091          |
| n_updates          | 698           |
| policy_entropy     | 0.05123552    |
| policy_loss        | -0.0006664987 |
| serial_timesteps   | 2233600       |
| time_elapsed       | 2.13e+03      |
| total_timesteps    | 2233600       |
| value_loss         | 0.0007527949  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0011436054  |
| clipfrac           | 0.01109375    |
| explained_variance | 0.7           |
| fps                | 1077          |
| n_updates          | 699           |
| policy_entropy     | 0.052057862   |
| policy_loss        | -0.0005863714 |
| serial_timesteps   | 2236800       |
| time_elapsed       | 2.14e+03      |
| total_timesteps    | 2236800       |
| value_loss         | 0.00076010963 |
-------------------------

-------------------------------------
| approxkl           | 0.0027490836 |
| clipfrac           | 0.022890626  |
| explained_variance | 0.746        |
| fps                | 970          |
| n_updates          | 715          |
| policy_entropy     | 0.058469128  |
| policy_loss        | -0.000613827 |
| serial_timesteps   | 2288000      |
| time_elapsed       | 2.19e+03     |
| total_timesteps    | 2288000      |
| value_loss         | 0.0006665987 |
-------------------------------------
--------------------------------------
| approxkl           | 0.0004513936  |
| clipfrac           | 0.00515625    |
| explained_variance | 0.714         |
| fps                | 875           |
| n_updates          | 716           |
| policy_entropy     | 0.04861346    |
| policy_loss        | -0.0010084657 |
| serial_timesteps   | 2291200       |
| time_elapsed       | 2.19e+03      |
| total_timesteps    | 2291200       |
| value_loss         | 0.0007104103  |
--------------------------------------

--------------------------------------
| approxkl           | 0.00069920986 |
| clipfrac           | 0.00734375    |
| explained_variance | 0.685         |
| fps                | 1095          |
| n_updates          | 732           |
| policy_entropy     | 0.057181455   |
| policy_loss        | -0.0011949602 |
| serial_timesteps   | 2342400       |
| time_elapsed       | 2.24e+03      |
| total_timesteps    | 2342400       |
| value_loss         | 0.0009501757  |
--------------------------------------
--------------------------------------
| approxkl           | 0.001012913   |
| clipfrac           | 0.010937501   |
| explained_variance | 0.74          |
| fps                | 1137          |
| n_updates          | 733           |
| policy_entropy     | 0.064861245   |
| policy_loss        | -0.0011668927 |
| serial_timesteps   | 2345600       |
| time_elapsed       | 2.24e+03      |
| total_timesteps    | 2345600       |
| value_loss         | 0.0008009955  |
-------------------------

--------------------------------------
| approxkl           | 0.001087876   |
| clipfrac           | 0.010078125   |
| explained_variance | 0.696         |
| fps                | 1013          |
| n_updates          | 749           |
| policy_entropy     | 0.045968086   |
| policy_loss        | -0.0016437729 |
| serial_timesteps   | 2396800       |
| time_elapsed       | 2.29e+03      |
| total_timesteps    | 2396800       |
| value_loss         | 0.0007955048  |
--------------------------------------
-------------------------------------
| approxkl           | 0.0009608065 |
| clipfrac           | 0.009375     |
| explained_variance | 0.823        |
| fps                | 1013         |
| n_updates          | 750          |
| policy_entropy     | 0.047651917  |
| policy_loss        | -0.002198558 |
| serial_timesteps   | 2400000      |
| time_elapsed       | 2.3e+03      |
| total_timesteps    | 2400000      |
| value_loss         | 0.0014608447 |
-------------------------------------

--------------------------------------
| approxkl           | 0.0010816569  |
| clipfrac           | 0.008593749   |
| explained_variance | 0.658         |
| fps                | 1076          |
| n_updates          | 766           |
| policy_entropy     | 0.051999874   |
| policy_loss        | -0.0014840715 |
| serial_timesteps   | 2451200       |
| time_elapsed       | 2.34e+03      |
| total_timesteps    | 2451200       |
| value_loss         | 0.0031879633  |
--------------------------------------
--------------------------------------
| approxkl           | 0.00074843434 |
| clipfrac           | 0.0071875     |
| explained_variance | 0.713         |
| fps                | 1065          |
| n_updates          | 767           |
| policy_entropy     | 0.05833125    |
| policy_loss        | -0.0013221886 |
| serial_timesteps   | 2454400       |
| time_elapsed       | 2.35e+03      |
| total_timesteps    | 2454400       |
| value_loss         | 0.00081878854 |
-------------------------

--------------------------------------
| approxkl           | 0.002134691   |
| clipfrac           | 0.00890625    |
| explained_variance | 0.714         |
| fps                | 1113          |
| n_updates          | 783           |
| policy_entropy     | 0.07004137    |
| policy_loss        | -0.0014124246 |
| serial_timesteps   | 2505600       |
| time_elapsed       | 2.4e+03       |
| total_timesteps    | 2505600       |
| value_loss         | 0.0008403833  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0022745258  |
| clipfrac           | 0.014609376   |
| explained_variance | 0.686         |
| fps                | 1089          |
| n_updates          | 784           |
| policy_entropy     | 0.06093059    |
| policy_loss        | -0.0015712811 |
| serial_timesteps   | 2508800       |
| time_elapsed       | 2.4e+03       |
| total_timesteps    | 2508800       |
| value_loss         | 0.0008343517  |
-------------------------

---------------------------------------
| approxkl           | 0.0019662683   |
| clipfrac           | 0.018828124    |
| explained_variance | 0.695          |
| fps                | 1115           |
| n_updates          | 800            |
| policy_entropy     | 0.05218098     |
| policy_loss        | -0.00092336937 |
| serial_timesteps   | 2560000        |
| time_elapsed       | 2.45e+03       |
| total_timesteps    | 2560000        |
| value_loss         | 0.0006961066   |
---------------------------------------
---------------------------------------
| approxkl           | 0.00088170473  |
| clipfrac           | 0.006953125    |
| explained_variance | 0.675          |
| fps                | 1109           |
| n_updates          | 801            |
| policy_entropy     | 0.058206946    |
| policy_loss        | -0.00097206666 |
| serial_timesteps   | 2563200        |
| time_elapsed       | 2.45e+03       |
| total_timesteps    | 2563200        |
| value_loss         | 0.0008858707   |


--------------------------------------
| approxkl           | 0.0010535875  |
| clipfrac           | 0.008046875   |
| explained_variance | 0.745         |
| fps                | 1097          |
| n_updates          | 817           |
| policy_entropy     | 0.053433914   |
| policy_loss        | -0.0014916953 |
| serial_timesteps   | 2614400       |
| time_elapsed       | 2.5e+03       |
| total_timesteps    | 2614400       |
| value_loss         | 0.00070346735 |
--------------------------------------
--------------------------------------
| approxkl           | 0.0008218074  |
| clipfrac           | 0.010234375   |
| explained_variance | 0.742         |
| fps                | 1124          |
| n_updates          | 818           |
| policy_entropy     | 0.049786422   |
| policy_loss        | -0.0008475251 |
| serial_timesteps   | 2617600       |
| time_elapsed       | 2.5e+03       |
| total_timesteps    | 2617600       |
| value_loss         | 0.0006472012  |
-------------------------

--------------------------------------
| approxkl           | 0.0012171995  |
| clipfrac           | 0.013359375   |
| explained_variance | 0.728         |
| fps                | 1126          |
| n_updates          | 834           |
| policy_entropy     | 0.05263438    |
| policy_loss        | -0.0010968875 |
| serial_timesteps   | 2668800       |
| time_elapsed       | 2.55e+03      |
| total_timesteps    | 2668800       |
| value_loss         | 0.0006884341  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0005551069  |
| clipfrac           | 0.00734375    |
| explained_variance | 0.741         |
| fps                | 1119          |
| n_updates          | 835           |
| policy_entropy     | 0.051485915   |
| policy_loss        | -0.0011508139 |
| serial_timesteps   | 2672000       |
| time_elapsed       | 2.55e+03      |
| total_timesteps    | 2672000       |
| value_loss         | 0.0005992368  |
-------------------------

--------------------------------------
| approxkl           | 0.0009973992  |
| clipfrac           | 0.012499999   |
| explained_variance | 0.746         |
| fps                | 1091          |
| n_updates          | 851           |
| policy_entropy     | 0.062287416   |
| policy_loss        | -0.001593576  |
| serial_timesteps   | 2723200       |
| time_elapsed       | 2.59e+03      |
| total_timesteps    | 2723200       |
| value_loss         | 0.00062663667 |
--------------------------------------
---------------------------------------
| approxkl           | 0.00081638654  |
| clipfrac           | 0.006953125    |
| explained_variance | 0.761          |
| fps                | 1074           |
| n_updates          | 852            |
| policy_entropy     | 0.05100353     |
| policy_loss        | -0.00097376783 |
| serial_timesteps   | 2726400        |
| time_elapsed       | 2.6e+03        |
| total_timesteps    | 2726400        |
| value_loss         | 0.00054373586  |
-------------

--------------------------------------
| approxkl           | 0.00055221305 |
| clipfrac           | 0.005546875   |
| explained_variance | 0.539         |
| fps                | 1088          |
| n_updates          | 868           |
| policy_entropy     | 0.05943307    |
| policy_loss        | -0.0012047344 |
| serial_timesteps   | 2777600       |
| time_elapsed       | 2.65e+03      |
| total_timesteps    | 2777600       |
| value_loss         | 0.0011194804  |
--------------------------------------
---------------------------------------
| approxkl           | 0.00096729636  |
| clipfrac           | 0.01015625     |
| explained_variance | 0.562          |
| fps                | 1081           |
| n_updates          | 869            |
| policy_entropy     | 0.05268418     |
| policy_loss        | -0.00083884154 |
| serial_timesteps   | 2780800        |
| time_elapsed       | 2.65e+03       |
| total_timesteps    | 2780800        |
| value_loss         | 0.00094497856  |
-------------

--------------------------------------
| approxkl           | 0.0007844111  |
| clipfrac           | 0.0078125     |
| explained_variance | 0.691         |
| fps                | 1021          |
| n_updates          | 885           |
| policy_entropy     | 0.06166567    |
| policy_loss        | -0.0017420427 |
| serial_timesteps   | 2832000       |
| time_elapsed       | 2.7e+03       |
| total_timesteps    | 2832000       |
| value_loss         | 0.00087004836 |
--------------------------------------
--------------------------------------
| approxkl           | 0.0012887714  |
| clipfrac           | 0.013359375   |
| explained_variance | 0.66          |
| fps                | 1024          |
| n_updates          | 886           |
| policy_entropy     | 0.06055933    |
| policy_loss        | -0.0009338413 |
| serial_timesteps   | 2835200       |
| time_elapsed       | 2.7e+03       |
| total_timesteps    | 2835200       |
| value_loss         | 0.0008069459  |
-------------------------

--------------------------------------
| approxkl           | 0.00069244014 |
| clipfrac           | 0.007265625   |
| explained_variance | 0.71          |
| fps                | 1106          |
| n_updates          | 902           |
| policy_entropy     | 0.0493655     |
| policy_loss        | -0.0004317072 |
| serial_timesteps   | 2886400       |
| time_elapsed       | 2.75e+03      |
| total_timesteps    | 2886400       |
| value_loss         | 0.000756027   |
--------------------------------------
---------------------------------------
| approxkl           | 0.0009268522   |
| clipfrac           | 0.0103124995   |
| explained_variance | 0.699          |
| fps                | 1087           |
| n_updates          | 903            |
| policy_entropy     | 0.05215566     |
| policy_loss        | -0.00063523796 |
| serial_timesteps   | 2889600        |
| time_elapsed       | 2.75e+03       |
| total_timesteps    | 2889600        |
| value_loss         | 0.0006572305   |
-------------

--------------------------------------
| approxkl           | 0.00048155256 |
| clipfrac           | 0.005859375   |
| explained_variance | 0.686         |
| fps                | 1022          |
| n_updates          | 919           |
| policy_entropy     | 0.053951405   |
| policy_loss        | -0.0011265981 |
| serial_timesteps   | 2940800       |
| time_elapsed       | 2.8e+03       |
| total_timesteps    | 2940800       |
| value_loss         | 0.0008134766  |
--------------------------------------
--------------------------------------
| approxkl           | 0.001294062   |
| clipfrac           | 0.0115625     |
| explained_variance | 0.709         |
| fps                | 1025          |
| n_updates          | 920           |
| policy_entropy     | 0.049497142   |
| policy_loss        | -0.0005558031 |
| serial_timesteps   | 2944000       |
| time_elapsed       | 2.8e+03       |
| total_timesteps    | 2944000       |
| value_loss         | 0.0007358121  |
-------------------------

--------------------------------------
| approxkl           | 0.0010349185  |
| clipfrac           | 0.0083593745  |
| explained_variance | 0.691         |
| fps                | 1192          |
| n_updates          | 936           |
| policy_entropy     | 0.051560406   |
| policy_loss        | -0.0003785757 |
| serial_timesteps   | 2995200       |
| time_elapsed       | 2.85e+03      |
| total_timesteps    | 2995200       |
| value_loss         | 0.0007891903  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0018864218  |
| clipfrac           | 0.014453125   |
| explained_variance | 0.682         |
| fps                | 1158          |
| n_updates          | 937           |
| policy_entropy     | 0.058517896   |
| policy_loss        | -0.0019384965 |
| serial_timesteps   | 2998400       |
| time_elapsed       | 2.86e+03      |
| total_timesteps    | 2998400       |
| value_loss         | 0.0008085102  |
-------------------------

--------------------------------------
| approxkl           | 0.0042614564  |
| clipfrac           | 0.019609375   |
| explained_variance | 0.662         |
| fps                | 1057          |
| n_updates          | 953           |
| policy_entropy     | 0.05441738    |
| policy_loss        | -0.0031322795 |
| serial_timesteps   | 3049600       |
| time_elapsed       | 2.9e+03       |
| total_timesteps    | 3049600       |
| value_loss         | 0.016684357   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0027654083  |
| clipfrac           | 0.0159375     |
| explained_variance | 0.568         |
| fps                | 976           |
| n_updates          | 954           |
| policy_entropy     | 0.05254558    |
| policy_loss        | -0.0010109695 |
| serial_timesteps   | 3052800       |
| time_elapsed       | 2.9e+03       |
| total_timesteps    | 3052800       |
| value_loss         | 0.0053289277  |
-------------------------

--------------------------------------
| approxkl           | 0.00055150094 |
| clipfrac           | 0.006953125   |
| explained_variance | 0.616         |
| fps                | 1188          |
| n_updates          | 970           |
| policy_entropy     | 0.050969083   |
| policy_loss        | -0.001015757  |
| serial_timesteps   | 3104000       |
| time_elapsed       | 2.95e+03      |
| total_timesteps    | 3104000       |
| value_loss         | 0.0010317278  |
--------------------------------------
---------------------------------------
| approxkl           | 0.0007108411   |
| clipfrac           | 0.00796875     |
| explained_variance | 0.64           |
| fps                | 1163           |
| n_updates          | 971            |
| policy_entropy     | 0.05066771     |
| policy_loss        | -0.00075097336 |
| serial_timesteps   | 3107200        |
| time_elapsed       | 2.95e+03       |
| total_timesteps    | 3107200        |
| value_loss         | 0.0009094328   |
-------------

--------------------------------------
| approxkl           | 0.0017841221  |
| clipfrac           | 0.016328126   |
| explained_variance | 0.569         |
| fps                | 1169          |
| n_updates          | 987           |
| policy_entropy     | 0.082675904   |
| policy_loss        | -0.0015763127 |
| serial_timesteps   | 3158400       |
| time_elapsed       | 3e+03         |
| total_timesteps    | 3158400       |
| value_loss         | 0.0011084691  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0012623866  |
| clipfrac           | 0.011875      |
| explained_variance | 0.681         |
| fps                | 1009          |
| n_updates          | 988           |
| policy_entropy     | 0.07613819    |
| policy_loss        | -0.0011117242 |
| serial_timesteps   | 3161600       |
| time_elapsed       | 3e+03         |
| total_timesteps    | 3161600       |
| value_loss         | 0.0008629655  |
-------------------------

---------------------------------------
| approxkl           | 0.0024806692   |
| clipfrac           | 0.022109374    |
| explained_variance | 0.692          |
| fps                | 1163           |
| n_updates          | 1004           |
| policy_entropy     | 0.07105699     |
| policy_loss        | -0.00085086084 |
| serial_timesteps   | 3212800        |
| time_elapsed       | 3.04e+03       |
| total_timesteps    | 3212800        |
| value_loss         | 0.0008386669   |
---------------------------------------
--------------------------------------
| approxkl           | 0.0009586685  |
| clipfrac           | 0.01203125    |
| explained_variance | 0.703         |
| fps                | 1188          |
| n_updates          | 1005          |
| policy_entropy     | 0.07472235    |
| policy_loss        | -0.0011116299 |
| serial_timesteps   | 3216000       |
| time_elapsed       | 3.05e+03      |
| total_timesteps    | 3216000       |
| value_loss         | 0.0007435868  |
------------

--------------------------------------
| approxkl           | 0.0016653345  |
| clipfrac           | 0.010624999   |
| explained_variance | 0.673         |
| fps                | 1020          |
| n_updates          | 1021          |
| policy_entropy     | 0.05766847    |
| policy_loss        | -0.0018605489 |
| serial_timesteps   | 3267200       |
| time_elapsed       | 3.09e+03      |
| total_timesteps    | 3267200       |
| value_loss         | 0.00082273135 |
--------------------------------------
--------------------------------------
| approxkl           | 0.0017152568  |
| clipfrac           | 0.012499999   |
| explained_variance | 0.702         |
| fps                | 1045          |
| n_updates          | 1022          |
| policy_entropy     | 0.06719619    |
| policy_loss        | -0.0010635478 |
| serial_timesteps   | 3270400       |
| time_elapsed       | 3.1e+03       |
| total_timesteps    | 3270400       |
| value_loss         | 0.0008486265  |
-------------------------

--------------------------------------
| approxkl           | 0.00089152076 |
| clipfrac           | 0.008984375   |
| explained_variance | 0.694         |
| fps                | 980           |
| n_updates          | 1038          |
| policy_entropy     | 0.05457755    |
| policy_loss        | -0.0013619992 |
| serial_timesteps   | 3321600       |
| time_elapsed       | 3.15e+03      |
| total_timesteps    | 3321600       |
| value_loss         | 0.00084436714 |
--------------------------------------
--------------------------------------
| approxkl           | 0.0014630947  |
| clipfrac           | 0.007421875   |
| explained_variance | 0.683         |
| fps                | 1033          |
| n_updates          | 1039          |
| policy_entropy     | 0.055998657   |
| policy_loss        | -0.0012868175 |
| serial_timesteps   | 3324800       |
| time_elapsed       | 3.15e+03      |
| total_timesteps    | 3324800       |
| value_loss         | 0.0008938117  |
-------------------------

--------------------------------------
| approxkl           | 0.0008797256  |
| clipfrac           | 0.007890625   |
| explained_variance | -0.573        |
| fps                | 1012          |
| n_updates          | 1055          |
| policy_entropy     | 0.061850876   |
| policy_loss        | -0.0006559768 |
| serial_timesteps   | 3376000       |
| time_elapsed       | 3.21e+03      |
| total_timesteps    | 3376000       |
| value_loss         | 0.004251131   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0009679025  |
| clipfrac           | 0.010390625   |
| explained_variance | 0.0557        |
| fps                | 978           |
| n_updates          | 1056          |
| policy_entropy     | 0.066957176   |
| policy_loss        | -0.0025820322 |
| serial_timesteps   | 3379200       |
| time_elapsed       | 3.21e+03      |
| total_timesteps    | 3379200       |
| value_loss         | 0.006549172   |
-------------------------

--------------------------------------
| approxkl           | 0.0005126422  |
| clipfrac           | 0.005         |
| explained_variance | 0.624         |
| fps                | 980           |
| n_updates          | 1072          |
| policy_entropy     | 0.057028923   |
| policy_loss        | -0.0012861374 |
| serial_timesteps   | 3430400       |
| time_elapsed       | 3.27e+03      |
| total_timesteps    | 3430400       |
| value_loss         | 0.0008901289  |
--------------------------------------
---------------------------------------
| approxkl           | 0.0005866099   |
| clipfrac           | 0.007265625    |
| explained_variance | 0.679          |
| fps                | 978            |
| n_updates          | 1073           |
| policy_entropy     | 0.061909787    |
| policy_loss        | -0.00096835877 |
| serial_timesteps   | 3433600        |
| time_elapsed       | 3.27e+03       |
| total_timesteps    | 3433600        |
| value_loss         | 0.00094266434  |
-------------

--------------------------------------
| approxkl           | 0.0014753563  |
| clipfrac           | 0.016796876   |
| explained_variance | -2.42         |
| fps                | 1038          |
| n_updates          | 1089          |
| policy_entropy     | 0.08003064    |
| policy_loss        | -0.0016127873 |
| serial_timesteps   | 3484800       |
| time_elapsed       | 3.32e+03      |
| total_timesteps    | 3484800       |
| value_loss         | 0.023323057   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0013620611  |
| clipfrac           | 0.013828125   |
| explained_variance | -0.913        |
| fps                | 993           |
| n_updates          | 1090          |
| policy_entropy     | 0.075088225   |
| policy_loss        | -0.0011324319 |
| serial_timesteps   | 3488000       |
| time_elapsed       | 3.33e+03      |
| total_timesteps    | 3488000       |
| value_loss         | 0.0146261025  |
-------------------------

--------------------------------------
| approxkl           | 0.0012680306  |
| clipfrac           | 0.015390625   |
| explained_variance | 0.568         |
| fps                | 1010          |
| n_updates          | 1106          |
| policy_entropy     | 0.068642884   |
| policy_loss        | -0.0007431765 |
| serial_timesteps   | 3539200       |
| time_elapsed       | 3.38e+03      |
| total_timesteps    | 3539200       |
| value_loss         | 0.0011036623  |
--------------------------------------
--------------------------------------
| approxkl           | 0.001898294   |
| clipfrac           | 0.009375      |
| explained_variance | 0.348         |
| fps                | 1039          |
| n_updates          | 1107          |
| policy_entropy     | 0.071712725   |
| policy_loss        | -0.0018499452 |
| serial_timesteps   | 3542400       |
| time_elapsed       | 3.38e+03      |
| total_timesteps    | 3542400       |
| value_loss         | 0.0019652825  |
-------------------------

--------------------------------------
| approxkl           | 0.001340847   |
| clipfrac           | 0.01171875    |
| explained_variance | 0.128         |
| fps                | 878           |
| n_updates          | 1123          |
| policy_entropy     | 0.058186837   |
| policy_loss        | -0.0011624098 |
| serial_timesteps   | 3593600       |
| time_elapsed       | 3.43e+03      |
| total_timesteps    | 3593600       |
| value_loss         | 0.0027105531  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0012528494  |
| clipfrac           | 0.011796875   |
| explained_variance | 0.245         |
| fps                | 936           |
| n_updates          | 1124          |
| policy_entropy     | 0.05699741    |
| policy_loss        | -0.0009067522 |
| serial_timesteps   | 3596800       |
| time_elapsed       | 3.44e+03      |
| total_timesteps    | 3596800       |
| value_loss         | 0.0024484913  |
-------------------------

--------------------------------------
| approxkl           | 0.000852388   |
| clipfrac           | 0.00796875    |
| explained_variance | 0.506         |
| fps                | 897           |
| n_updates          | 1140          |
| policy_entropy     | 0.05205889    |
| policy_loss        | -0.0010207276 |
| serial_timesteps   | 3648000       |
| time_elapsed       | 3.49e+03      |
| total_timesteps    | 3648000       |
| value_loss         | 0.0012632834  |
--------------------------------------
---------------------------------------
| approxkl           | 0.00073458906  |
| clipfrac           | 0.009296875    |
| explained_variance | 0.281          |
| fps                | 951            |
| n_updates          | 1141           |
| policy_entropy     | 0.058638684    |
| policy_loss        | -0.00068872306 |
| serial_timesteps   | 3651200        |
| time_elapsed       | 3.49e+03       |
| total_timesteps    | 3651200        |
| value_loss         | 0.0016772556   |
-------------

-------------------------------------
| approxkl           | 0.0037744907 |
| clipfrac           | 0.021484375  |
| explained_variance | 0.507        |
| fps                | 1151         |
| n_updates          | 1157         |
| policy_entropy     | 0.06668344   |
| policy_loss        | -0.002759243 |
| serial_timesteps   | 3702400      |
| time_elapsed       | 3.54e+03     |
| total_timesteps    | 3702400      |
| value_loss         | 0.0057196785 |
-------------------------------------
---------------------------------------
| approxkl           | 0.00070771534  |
| clipfrac           | 0.007578125    |
| explained_variance | 0.562          |
| fps                | 1123           |
| n_updates          | 1158           |
| policy_entropy     | 0.064709485    |
| policy_loss        | -0.00094387645 |
| serial_timesteps   | 3705600        |
| time_elapsed       | 3.55e+03       |
| total_timesteps    | 3705600        |
| value_loss         | 0.0013123245   |
--------------------------

--------------------------------------
| approxkl           | 0.0013955913  |
| clipfrac           | 0.0131250005  |
| explained_variance | 0.675         |
| fps                | 1080          |
| n_updates          | 1174          |
| policy_entropy     | 0.06687541    |
| policy_loss        | -0.0011521295 |
| serial_timesteps   | 3756800       |
| time_elapsed       | 3.6e+03       |
| total_timesteps    | 3756800       |
| value_loss         | 0.002759127   |
--------------------------------------
--------------------------------------
| approxkl           | 0.001108456   |
| clipfrac           | 0.009765625   |
| explained_variance | 0.464         |
| fps                | 1042          |
| n_updates          | 1175          |
| policy_entropy     | 0.07530454    |
| policy_loss        | -0.0026821904 |
| serial_timesteps   | 3760000       |
| time_elapsed       | 3.6e+03       |
| total_timesteps    | 3760000       |
| value_loss         | 0.014975957   |
-------------------------

---------------------------------------
| approxkl           | 0.00065199763  |
| clipfrac           | 0.007265625    |
| explained_variance | 0.477          |
| fps                | 1101           |
| n_updates          | 1191           |
| policy_entropy     | 0.06826554     |
| policy_loss        | -0.00039955467 |
| serial_timesteps   | 3811200        |
| time_elapsed       | 3.65e+03       |
| total_timesteps    | 3811200        |
| value_loss         | 0.0012314118   |
---------------------------------------
--------------------------------------
| approxkl           | 0.0028063164  |
| clipfrac           | 0.015859375   |
| explained_variance | 0.651         |
| fps                | 1134          |
| n_updates          | 1192          |
| policy_entropy     | 0.06225381    |
| policy_loss        | -0.001285739  |
| serial_timesteps   | 3814400       |
| time_elapsed       | 3.65e+03      |
| total_timesteps    | 3814400       |
| value_loss         | 0.00087390555 |
------------

--------------------------------------
| approxkl           | 0.0013844369  |
| clipfrac           | 0.0142187495  |
| explained_variance | 0.149         |
| fps                | 1127          |
| n_updates          | 1208          |
| policy_entropy     | 0.052851472   |
| policy_loss        | -0.0011864137 |
| serial_timesteps   | 3865600       |
| time_elapsed       | 3.7e+03       |
| total_timesteps    | 3865600       |
| value_loss         | 0.0066662417  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0005385108  |
| clipfrac           | 0.0063281246  |
| explained_variance | 0.393         |
| fps                | 1084          |
| n_updates          | 1209          |
| policy_entropy     | 0.05585747    |
| policy_loss        | -0.0014668524 |
| serial_timesteps   | 3868800       |
| time_elapsed       | 3.7e+03       |
| total_timesteps    | 3868800       |
| value_loss         | 0.0013976012  |
-------------------------

--------------------------------------
| approxkl           | 0.0010049858  |
| clipfrac           | 0.0078125     |
| explained_variance | 0.682         |
| fps                | 1113          |
| n_updates          | 1225          |
| policy_entropy     | 0.042395864   |
| policy_loss        | -0.000880333  |
| serial_timesteps   | 3920000       |
| time_elapsed       | 3.75e+03      |
| total_timesteps    | 3920000       |
| value_loss         | 0.00082444306 |
--------------------------------------
---------------------------------------
| approxkl           | 0.00037952865  |
| clipfrac           | 0.004375       |
| explained_variance | 0.639          |
| fps                | 1149           |
| n_updates          | 1226           |
| policy_entropy     | 0.047207054    |
| policy_loss        | -0.00074294244 |
| serial_timesteps   | 3923200        |
| time_elapsed       | 3.75e+03       |
| total_timesteps    | 3923200        |
| value_loss         | 0.0009006989   |
-------------

--------------------------------------
| approxkl           | 0.0005652857  |
| clipfrac           | 0.007109375   |
| explained_variance | 0.71          |
| fps                | 1079          |
| n_updates          | 1242          |
| policy_entropy     | 0.046845876   |
| policy_loss        | -0.0005771556 |
| serial_timesteps   | 3974400       |
| time_elapsed       | 3.79e+03      |
| total_timesteps    | 3974400       |
| value_loss         | 0.0007967998  |
--------------------------------------
-------------------------------------
| approxkl           | 0.0011490617 |
| clipfrac           | 0.0111718755 |
| explained_variance | 0.642        |
| fps                | 1085         |
| n_updates          | 1243         |
| policy_entropy     | 0.048395403  |
| policy_loss        | -0.001669965 |
| serial_timesteps   | 3977600      |
| time_elapsed       | 3.8e+03      |
| total_timesteps    | 3977600      |
| value_loss         | 0.0028323412 |
-------------------------------------

--------------------------------------
| approxkl           | 0.00054101326 |
| clipfrac           | 0.0065625003  |
| explained_variance | 0.466         |
| fps                | 1137          |
| n_updates          | 1259          |
| policy_entropy     | 0.062010907   |
| policy_loss        | -0.0008106321 |
| serial_timesteps   | 4028800       |
| time_elapsed       | 3.85e+03      |
| total_timesteps    | 4028800       |
| value_loss         | 0.0014594856  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0012568163  |
| clipfrac           | 0.011640625   |
| explained_variance | 0.635         |
| fps                | 1099          |
| n_updates          | 1260          |
| policy_entropy     | 0.05803082    |
| policy_loss        | -0.0019752427 |
| serial_timesteps   | 4032000       |
| time_elapsed       | 3.85e+03      |
| total_timesteps    | 4032000       |
| value_loss         | 0.003293322   |
-------------------------

--------------------------------------
| approxkl           | 0.0009341966  |
| clipfrac           | 0.0092968745  |
| explained_variance | 0.68          |
| fps                | 1170          |
| n_updates          | 1276          |
| policy_entropy     | 0.06143097    |
| policy_loss        | -0.0010457672 |
| serial_timesteps   | 4083200       |
| time_elapsed       | 3.9e+03       |
| total_timesteps    | 4083200       |
| value_loss         | 0.0008032517  |
--------------------------------------
--------------------------------------
| approxkl           | 0.00086227176 |
| clipfrac           | 0.0078125     |
| explained_variance | 0.647         |
| fps                | 1160          |
| n_updates          | 1277          |
| policy_entropy     | 0.057927765   |
| policy_loss        | -0.0012179065 |
| serial_timesteps   | 4086400       |
| time_elapsed       | 3.9e+03       |
| total_timesteps    | 4086400       |
| value_loss         | 0.0008658821  |
-------------------------

--------------------------------------
| approxkl           | 0.0016472822  |
| clipfrac           | 0.015703125   |
| explained_variance | 0.499         |
| fps                | 924           |
| n_updates          | 1293          |
| policy_entropy     | 0.076913595   |
| policy_loss        | -0.0014612947 |
| serial_timesteps   | 4137600       |
| time_elapsed       | 3.95e+03      |
| total_timesteps    | 4137600       |
| value_loss         | 0.0015686107  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0014894374  |
| clipfrac           | 0.013671875   |
| explained_variance | 0.654         |
| fps                | 1005          |
| n_updates          | 1294          |
| policy_entropy     | 0.082029216   |
| policy_loss        | -0.0013202745 |
| serial_timesteps   | 4140800       |
| time_elapsed       | 3.95e+03      |
| total_timesteps    | 4140800       |
| value_loss         | 0.0010181229  |
-------------------------

--------------------------------------
| approxkl           | 0.0070914524  |
| clipfrac           | 0.016484376   |
| explained_variance | 0.382         |
| fps                | 928           |
| n_updates          | 1310          |
| policy_entropy     | 0.061517384   |
| policy_loss        | -0.0026333716 |
| serial_timesteps   | 4192000       |
| time_elapsed       | 4e+03         |
| total_timesteps    | 4192000       |
| value_loss         | 0.005248013   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00047201203 |
| clipfrac           | 0.005859375   |
| explained_variance | 0.179         |
| fps                | 963           |
| n_updates          | 1311          |
| policy_entropy     | 0.058891725   |
| policy_loss        | -0.0003824731 |
| serial_timesteps   | 4195200       |
| time_elapsed       | 4e+03         |
| total_timesteps    | 4195200       |
| value_loss         | 0.0025598425  |
-------------------------

--------------------------------------
| approxkl           | 0.00058332435 |
| clipfrac           | 0.0052343747  |
| explained_variance | 0.561         |
| fps                | 1020          |
| n_updates          | 1327          |
| policy_entropy     | 0.053230286   |
| policy_loss        | -0.0010373534 |
| serial_timesteps   | 4246400       |
| time_elapsed       | 4.05e+03      |
| total_timesteps    | 4246400       |
| value_loss         | 0.0011018253  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0015871166  |
| clipfrac           | 0.016171874   |
| explained_variance | 0.606         |
| fps                | 996           |
| n_updates          | 1328          |
| policy_entropy     | 0.058723204   |
| policy_loss        | -0.0007829969 |
| serial_timesteps   | 4249600       |
| time_elapsed       | 4.06e+03      |
| total_timesteps    | 4249600       |
| value_loss         | 0.0009788933  |
-------------------------

--------------------------------------
| approxkl           | 0.0010740014  |
| clipfrac           | 0.01078125    |
| explained_variance | 0.692         |
| fps                | 1080          |
| n_updates          | 1344          |
| policy_entropy     | 0.061896354   |
| policy_loss        | -0.0014130624 |
| serial_timesteps   | 4300800       |
| time_elapsed       | 4.11e+03      |
| total_timesteps    | 4300800       |
| value_loss         | 0.0009631227  |
--------------------------------------
---------------------------------------
| approxkl           | 0.0013838708   |
| clipfrac           | 0.012890625    |
| explained_variance | 0.66           |
| fps                | 1092           |
| n_updates          | 1345           |
| policy_entropy     | 0.058350153    |
| policy_loss        | -0.00072210055 |
| serial_timesteps   | 4304000        |
| time_elapsed       | 4.11e+03       |
| total_timesteps    | 4304000        |
| value_loss         | 0.0008936217   |
-------------

--------------------------------------
| approxkl           | 0.0011541483  |
| clipfrac           | 0.0140625     |
| explained_variance | 0.64          |
| fps                | 1108          |
| n_updates          | 1361          |
| policy_entropy     | 0.07505053    |
| policy_loss        | -0.0010783136 |
| serial_timesteps   | 4355200       |
| time_elapsed       | 4.16e+03      |
| total_timesteps    | 4355200       |
| value_loss         | 0.00093458436 |
--------------------------------------
--------------------------------------
| approxkl           | 0.001795124   |
| clipfrac           | 0.017109375   |
| explained_variance | 0.62          |
| fps                | 1032          |
| n_updates          | 1362          |
| policy_entropy     | 0.077789485   |
| policy_loss        | -0.0011243565 |
| serial_timesteps   | 4358400       |
| time_elapsed       | 4.16e+03      |
| total_timesteps    | 4358400       |
| value_loss         | 0.008211323   |
-------------------------

---------------------------------------
| approxkl           | 0.0013095329   |
| clipfrac           | 0.0140625      |
| explained_variance | 0.67           |
| fps                | 1092           |
| n_updates          | 1378           |
| policy_entropy     | 0.07575707     |
| policy_loss        | -0.00044574455 |
| serial_timesteps   | 4409600        |
| time_elapsed       | 4.21e+03       |
| total_timesteps    | 4409600        |
| value_loss         | 0.0008171764   |
---------------------------------------
--------------------------------------
| approxkl           | 0.0008069983  |
| clipfrac           | 0.008203125   |
| explained_variance | 0.534         |
| fps                | 1096          |
| n_updates          | 1379          |
| policy_entropy     | 0.08025496    |
| policy_loss        | -0.0015412467 |
| serial_timesteps   | 4412800       |
| time_elapsed       | 4.21e+03      |
| total_timesteps    | 4412800       |
| value_loss         | 0.0039187204  |
------------

--------------------------------------
| approxkl           | 0.001477682   |
| clipfrac           | 0.013046875   |
| explained_variance | 0.669         |
| fps                | 1066          |
| n_updates          | 1395          |
| policy_entropy     | 0.078394994   |
| policy_loss        | -0.0027566226 |
| serial_timesteps   | 4464000       |
| time_elapsed       | 4.26e+03      |
| total_timesteps    | 4464000       |
| value_loss         | 0.011112707   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0018909777  |
| clipfrac           | 0.00984375    |
| explained_variance | 0.46          |
| fps                | 1100          |
| n_updates          | 1396          |
| policy_entropy     | 0.079907164   |
| policy_loss        | -0.0019016084 |
| serial_timesteps   | 4467200       |
| time_elapsed       | 4.26e+03      |
| total_timesteps    | 4467200       |
| value_loss         | 0.0036359932  |
-------------------------

--------------------------------------
| approxkl           | 0.0029206998  |
| clipfrac           | 0.022265626   |
| explained_variance | 0.416         |
| fps                | 945           |
| n_updates          | 1412          |
| policy_entropy     | 0.06921765    |
| policy_loss        | -0.0015496593 |
| serial_timesteps   | 4518400       |
| time_elapsed       | 4.31e+03      |
| total_timesteps    | 4518400       |
| value_loss         | 0.005796147   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00034671652 |
| clipfrac           | 0.004453125   |
| explained_variance | 0.37          |
| fps                | 923           |
| n_updates          | 1413          |
| policy_entropy     | 0.060154833   |
| policy_loss        | -0.0010117167 |
| serial_timesteps   | 4521600       |
| time_elapsed       | 4.31e+03      |
| total_timesteps    | 4521600       |
| value_loss         | 0.0016313714  |
-------------------------

--------------------------------------
| approxkl           | 0.00082492677 |
| clipfrac           | 0.008125      |
| explained_variance | 0.674         |
| fps                | 1206          |
| n_updates          | 1429          |
| policy_entropy     | 0.057825163   |
| policy_loss        | -0.0008634012 |
| serial_timesteps   | 4572800       |
| time_elapsed       | 4.36e+03      |
| total_timesteps    | 4572800       |
| value_loss         | 0.0007860003  |
--------------------------------------
---------------------------------------
| approxkl           | 0.00086979824  |
| clipfrac           | 0.009765625    |
| explained_variance | 0.686          |
| fps                | 1173           |
| n_updates          | 1430           |
| policy_entropy     | 0.057233937    |
| policy_loss        | -0.00083567784 |
| serial_timesteps   | 4576000        |
| time_elapsed       | 4.36e+03       |
| total_timesteps    | 4576000        |
| value_loss         | 0.0007798518   |
-------------

--------------------------------------
| approxkl           | 0.00089158944 |
| clipfrac           | 0.00984375    |
| explained_variance | 0.513         |
| fps                | 903           |
| n_updates          | 1446          |
| policy_entropy     | 0.05447632    |
| policy_loss        | -0.0007188362 |
| serial_timesteps   | 4627200       |
| time_elapsed       | 4.41e+03      |
| total_timesteps    | 4627200       |
| value_loss         | 0.0013497814  |
--------------------------------------
--------------------------------------
| approxkl           | 0.002976626   |
| clipfrac           | 0.015234374   |
| explained_variance | 0.117         |
| fps                | 885           |
| n_updates          | 1447          |
| policy_entropy     | 0.05666817    |
| policy_loss        | -0.0028677345 |
| serial_timesteps   | 4630400       |
| time_elapsed       | 4.41e+03      |
| total_timesteps    | 4630400       |
| value_loss         | 0.0064552473  |
-------------------------

--------------------------------------
| approxkl           | 0.0006310143  |
| clipfrac           | 0.006015625   |
| explained_variance | 0.593         |
| fps                | 890           |
| n_updates          | 1463          |
| policy_entropy     | 0.05217827    |
| policy_loss        | -0.0014634221 |
| serial_timesteps   | 4681600       |
| time_elapsed       | 4.47e+03      |
| total_timesteps    | 4681600       |
| value_loss         | 0.0010230034  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0009996919  |
| clipfrac           | 0.010234375   |
| explained_variance | 0.642         |
| fps                | 928           |
| n_updates          | 1464          |
| policy_entropy     | 0.053021356   |
| policy_loss        | -0.0010359434 |
| serial_timesteps   | 4684800       |
| time_elapsed       | 4.47e+03      |
| total_timesteps    | 4684800       |
| value_loss         | 0.0008734951  |
-------------------------

--------------------------------------
| approxkl           | 0.0012658963  |
| clipfrac           | 0.01109375    |
| explained_variance | 0.299         |
| fps                | 879           |
| n_updates          | 1480          |
| policy_entropy     | 0.05784855    |
| policy_loss        | -0.0021344023 |
| serial_timesteps   | 4736000       |
| time_elapsed       | 4.53e+03      |
| total_timesteps    | 4736000       |
| value_loss         | 0.007509286   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0002853623  |
| clipfrac           | 0.003671875   |
| explained_variance | 0.701         |
| fps                | 1013          |
| n_updates          | 1481          |
| policy_entropy     | 0.051936988   |
| policy_loss        | -0.0006634181 |
| serial_timesteps   | 4739200       |
| time_elapsed       | 4.53e+03      |
| total_timesteps    | 4739200       |
| value_loss         | 0.0009014852  |
-------------------------

--------------------------------------
| approxkl           | 0.0019190016  |
| clipfrac           | 0.013515625   |
| explained_variance | 0.638         |
| fps                | 1012          |
| n_updates          | 1497          |
| policy_entropy     | 0.065653466   |
| policy_loss        | -0.0011401951 |
| serial_timesteps   | 4790400       |
| time_elapsed       | 4.58e+03      |
| total_timesteps    | 4790400       |
| value_loss         | 0.0009052369  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0021337625  |
| clipfrac           | 0.016640624   |
| explained_variance | 0.677         |
| fps                | 919           |
| n_updates          | 1498          |
| policy_entropy     | 0.07104121    |
| policy_loss        | -0.0016622843 |
| serial_timesteps   | 4793600       |
| time_elapsed       | 4.59e+03      |
| total_timesteps    | 4793600       |
| value_loss         | 0.0008169988  |
-------------------------

---------------------------------------
| approxkl           | 0.00081642356  |
| clipfrac           | 0.006953125    |
| explained_variance | -0.00783       |
| fps                | 946            |
| n_updates          | 1514           |
| policy_entropy     | 0.07298763     |
| policy_loss        | -0.00067769457 |
| serial_timesteps   | 4844800        |
| time_elapsed       | 4.64e+03       |
| total_timesteps    | 4844800        |
| value_loss         | 0.002345271    |
---------------------------------------
--------------------------------------
| approxkl           | 0.002078819   |
| clipfrac           | 0.018437501   |
| explained_variance | 0.324         |
| fps                | 968           |
| n_updates          | 1515          |
| policy_entropy     | 0.069484696   |
| policy_loss        | -0.0013102196 |
| serial_timesteps   | 4848000       |
| time_elapsed       | 4.65e+03      |
| total_timesteps    | 4848000       |
| value_loss         | 0.0019535169  |
------------

---------------------------------------
| approxkl           | 0.0020784016   |
| clipfrac           | 0.021484375    |
| explained_variance | 0.676          |
| fps                | 1167           |
| n_updates          | 1531           |
| policy_entropy     | 0.07045511     |
| policy_loss        | -0.00062763534 |
| serial_timesteps   | 4899200        |
| time_elapsed       | 4.69e+03       |
| total_timesteps    | 4899200        |
| value_loss         | 0.0007966877   |
---------------------------------------
--------------------------------------
| approxkl           | 0.0009899     |
| clipfrac           | 0.008125      |
| explained_variance | 0.662         |
| fps                | 1184          |
| n_updates          | 1532          |
| policy_entropy     | 0.06509351    |
| policy_loss        | -0.0010804622 |
| serial_timesteps   | 4902400       |
| time_elapsed       | 4.7e+03       |
| total_timesteps    | 4902400       |
| value_loss         | 0.0008623192  |
------------

--------------------------------------
| approxkl           | 0.0012108323  |
| clipfrac           | 0.01234375    |
| explained_variance | 0.636         |
| fps                | 1121          |
| n_updates          | 1548          |
| policy_entropy     | 0.04988206    |
| policy_loss        | -0.0015576635 |
| serial_timesteps   | 4953600       |
| time_elapsed       | 4.74e+03      |
| total_timesteps    | 4953600       |
| value_loss         | 0.0026686706  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0008242439  |
| clipfrac           | 0.00921875    |
| explained_variance | 0.696         |
| fps                | 1144          |
| n_updates          | 1549          |
| policy_entropy     | 0.047899596   |
| policy_loss        | -0.0009869216 |
| serial_timesteps   | 4956800       |
| time_elapsed       | 4.75e+03      |
| total_timesteps    | 4956800       |
| value_loss         | 0.0007848902  |
-------------------------

In [80]:
# model_lesson5.save(model_names[5])

## Testing lesson5

In [86]:
# model_lesson5 = PPO2.load(model_names[5])
test(env_lesson5, model_lesson5, render = False)

Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Win  5 / 5  games
Tie  0 / 5  games
Lose  0 / 5  games


In [None]:
model_lesson5.save(model_names[5])