# Pommerman Demo.

This notebook demonstrates how to train Pommerman agents. Please let us know at support@pommerman.com if you run into any issues.

In [408]:
import os
import sys
import numpy as np
import time

import pommerman
from pommerman.agents import SimpleAgent, RandomAgent, PlayerAgent, BaseAgent
from pommerman.configs import ffa_v0_fast_env
from pommerman.envs.v0 import Pomme as Pomme_v0
from pommerman.characters import Bomber
from pommerman import utility
from pommerman import agents
from pommerman import envs
from pommerman import constants
from pommerman import characters
from pommerman import configs

# print all env configs
print(pommerman.REGISTRY)

['AdvancedLesson-v0', 'PommeFFACompetition-v0', 'PommeFFACompetitionFast-v0', 'PommeFFAFast-v0', 'PommeFFA-v1', 'PommeFFAFast-v3', 'PommeFFAFast-v4', 'Lesson1-v0', 'Lesson2-v0', 'Lesson2b-v0', 'Lesson2c-v0', 'Lesson2d-v0', 'Lesson2e-v0', 'Lesson3-v0', 'Lesson3b-v0', 'Lesson3c-v0', 'Lesson3d-v0', 'OneVsOne-v0', 'PommeRadioCompetition-v2', 'PommeRadio-v2', 'Simple-v0', 'SimpleRandomTeam-v0', 'SimpleTeam-v0', 'PommeTeamCompetition-v0', 'PommeTeamCompetitionFast-v0', 'PommeTeamCompetition-v1', 'PommeTeam-v0', 'PommeTeamFast-v0', 'PommeTeamSimple-v0']


# Train with stable baseline

In [409]:
import gym
from gym import spaces
import tensorflow as tf
from stable_baselines.a2c.utils import linear
from stable_baselines.common.policies import ActorCriticPolicy, MlpPolicy, CnnPolicy, FeedForwardPolicy
from stable_baselines.common.vec_env import SubprocVecEnv, DummyVecEnv
from stable_baselines import PPO2

## Inherit pommerman env and make it compatible with stable-baseline

In [474]:
class CustomPomme(Pomme_v0):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.obs_raw = None # store the raw version of observation
        self.training_idx = 1 # idx of the agent being trained
    
    # function to flatten pommerman observation
    def _transform_obs(self, obs_raw):
        obs_training = obs_raw[self.training_idx] # default the first agent to be trained

        # construct flattened observation
        obs = [
            *np.array(obs_training["board"]).reshape(-1),
            *np.array(obs_training["bomb_blast_strength"]).reshape(-1),
            *np.array(obs_training["bomb_life"]).reshape(-1),
            *np.array(obs_training["position"]).reshape(-1),
            obs_training["ammo"],
            obs_training["blast_strength"],
            obs_training["can_kick"],
            obs_training["teammate"].value,
            obs_training["enemies"][0].value,
            
            # uncommon if training 1 v 1
            obs_training["enemies"][0].value,
            obs_training["enemies"][0].value,
            
            # uncommon if training 2 v 2
#             obs_training["enemies"][1].value,
#             obs_training["enemies"][2].value,
        ]
        return obs
    
    def get_obs_raw(self):
        return self.obs_raw

    def step(self, action_training):
        action_nontraining = self.act(self.obs_raw)
        actions = [*action_nontraining, action_training]
        obs_raw, reward, done, info = super().step(actions)
        self.obs_raw = obs_raw
        return self._transform_obs(obs_raw), reward[self.training_idx], done, info
    
    def reset(self):
        obs_raw = super().reset()
        self.obs_raw = obs_raw
        return self._transform_obs(obs_raw)
    
    def render(self,
               mode=None,
               close=False,
               record_pngs_dir=None,
               record_json_dir=None,
               do_sleep=True):
        super().render(mode=mode,
                       close=close,
                       record_pngs_dir=record_pngs_dir,
                       record_json_dir=record_json_dir,
                       do_sleep=do_sleep)

In [475]:
class CustomCNN(ActorCriticPolicy):
    def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, **kwargs):
        super(CustomCNN, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=reuse)
        ob_len = ob_space.shape[0]
        size=11
        bp = 3*size**2 #board partition
        with tf.variable_scope("model", reuse=reuse):
            obs = self.processed_obs
            #print("Okay, what's going on?", obs.shape)
            self.board1, self.misc = tf.split(obs, [bp, -1], 1)
            #print("Initial shapes:", self.board1.shape, self.misc.shape)
            
            self.board = tf.reshape(self.board1, (-1, size, size, 3))
            #print("Processed shapes:", self.board.shape, self.misc.shape)
            self.conv1 = tf.layers.conv2d(self.board, 64, 2, activation=tf.nn.relu, name='conv1')
            self.conv2 = tf.layers.conv2d(self.conv1, 32, 2, activation=tf.nn.relu, name='conv2')
            self.fc0 = tf.contrib.layers.flatten(self.conv2)
            #print("fc shapes:", self.fc0.shape, self.misc.shape)
            self.fc1 = tf.concat((self.fc0, self.misc), -1)
            #print("Catted shape", self.fc1.shape)
            self.fc1 = tf.layers.dense(self.fc1, 1024, name = 'fc1')
            self.actions = tf.layers.dense(self.fc1, 6)   
            self.valueUM = tf.layers.dense(self.fc1, 128) #??

            self._proba_distribution, self._policy, self.q_value = \
                self.pdtype.proba_distribution_from_latent(self.actions, self.valueUM, init_scale=0.01)

        self._value_fn = linear(self.valueUM, 'vf', 1)
        self._setup_init()

    def step(self, obs, state=None, mask=None, deterministic=False):
        #print(obs)
        #b, c, conv, flat = self.sess.run((self.board, self.conv1, self.conv2, self.fc0), {self.obs_ph:obs})
        #print(b.shape, c.shape, conv.shape)
        if deterministic:
            action, value, neglogp = self.sess.run([self.deterministic_action, self.value_flat, self.neglogp],
                                                   {self.obs_ph: obs})
        else:
            action, value, neglogp = self.sess.run([self.action, self.value_flat, self.neglogp],
                                                   {self.obs_ph: obs})
        return action, value, self.initial_state, neglogp

    def proba_step(self, obs, state=None, mask=None):
        return self.sess.run(self.policy_proba, {self.obs_ph: obs})

    def value(self, obs, state=None, mask=None):
        return self.sess.run(self.value_flat, {self.obs_ph: obs})

In [476]:
class CustomCNN2(ActorCriticPolicy):
    def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, **kwargs):
        super(CustomCNN2, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=reuse)
        ob_len = ob_space.shape[0]
        size=11
        bp = 3*size**2 #board partition
        with tf.variable_scope("model", reuse=reuse):
            obs = self.processed_obs
            self.board1, self.misc = tf.split(obs, [bp, -1], 1)
            
            self.board = tf.reshape(self.board1, (-1, size, size, 3))
            self.conv1 = tf.layers.conv2d(self.board, 64, 3, activation=tf.nn.relu, name='conv1')
            self.conv2 = tf.layers.conv2d(self.conv1, 64, 3, activation=tf.nn.relu, name='conv2')
            self.conv3 = tf.layers.conv2d(self.conv2, 64, 3, activation=tf.nn.relu, name='conv3')
            self.conv4 = tf.layers.conv2d(self.conv3, 64, 3, activation=tf.nn.relu, name='conv4')
            self.conv5 = tf.layers.conv2d(self.conv4, 64, 3, activation=tf.nn.relu, name='conv5')
            self.fc0 = tf.contrib.layers.flatten(self.conv5)
            self.fc0 = tf.concat((self.fc0, self.misc), -1)
            self.fc1 = tf.layers.dense(self.fc0, 1024, name = 'fc1')
            self.fc2 = tf.layers.dense(self.fc1, 200, name = 'fc2')
            self.fc3 = tf.layers.dense(self.fc2, 50, name = 'fc3')
            self.actions = tf.layers.dense(self.fc3, 6)   
            self.valueUM = tf.layers.dense(self.fc3, 128) #??

            self._proba_distribution, self._policy, self.q_value = \
                self.pdtype.proba_distribution_from_latent(self.actions, self.valueUM, init_scale=0.01)

        self._value_fn = linear(self.valueUM, 'vf', 1)
        self._setup_init()

    def step(self, obs, state=None, mask=None, deterministic=False):
        if deterministic:
            action, value, neglogp = self.sess.run([self.deterministic_action, self.value_flat, self.neglogp],
                                                   {self.obs_ph: obs})
        else:
            action, value, neglogp = self.sess.run([self.action, self.value_flat, self.neglogp],
                                                   {self.obs_ph: obs})
        return action, value, self.initial_state, neglogp

    def proba_step(self, obs, state=None, mask=None):
        return self.sess.run(self.policy_proba, {self.obs_ph: obs})

    def value(self, obs, state=None, mask=None):
        return self.sess.run(self.value_flat, {self.obs_ph: obs})

In [477]:
def team_v3_fast_env():
    """Start up a FFA config with the default settings."""
    env = CustomPomme2
    game_type = constants.GameType.Team
    env_entry_point = 'CustomPomme'
    env_id = 'PommeTeamFast-v3'
    env_kwargs = {
        'game_type': game_type,
        'board_size': 6,
        'num_rigid': 0,
        'num_wood': 0,
        'num_items': 0,
        'max_steps': constants.MAX_STEPS,
        'render_fps': 1000,
        'env': env_entry_point,
    }
    agent = characters.Bomber
    return locals()

def one_vs_one_v3_env():
    """Start up a FFA config with the default settings."""
    env = CustomPomme2
    game_type = constants.GameType.OneVsOne
    env_entry_point = 'CustomPomme'
    env_id = 'PommeOneVsOneFast-v3'
    env_kwargs = {
        'game_type': game_type,
        'board_size': 11,
        'num_rigid': 0,
        'num_wood': 0,
        'num_items': 0,
        'max_steps': constants.MAX_STEPS,
        'render_fps': 1000,
        'env': env_entry_point,
    }
    agent = characters.Bomber
    return locals()

def one_vs_one_v3_wood(wood):
    """Start up a FFA config with the default settings."""
    env = CustomPomme2
    game_type = constants.GameType.OneVsOne
    env_entry_point = 'CustomPomme'
    env_id = 'PommeOneVsOneFast-v3'
    env_kwargs = {
        'game_type': game_type,
        'board_size': 6,
        'num_rigid': 0,
        'num_wood': wood,
        'num_items': 0,
        'max_steps': constants.MAX_STEPS,
        'render_fps': 1000,
        'env': env_entry_point,
    }
    agent = characters.Bomber
    return locals()


def one_vs_one_v3_wood_walls(n):
    """Start up a FFA config with the default settings."""
    env = CustomPomme2
    game_type = constants.GameType.OneVsOne
    env_entry_point = 'CustomPomme'
    env_id = 'PommeOneVsOneFast-v3'
    env_kwargs = {
        'game_type': game_type,
        'board_size': 6,
        'num_rigid': n,
        'num_wood': n,
        'num_items': 0,
        'max_steps': constants.MAX_STEPS,
        'render_fps': 1000,
        'env': env_entry_point,
    }
    agent = characters.Bomber
    return locals()

In [478]:
class StaticAgent(BaseAgent):
    def act(self, obs, action_space):
        return 0

In [479]:
class RandomAgentNoBomb(RandomAgent):
    def act(self, obs, action_space):
        action = super().act(obs, action_space)
        if action == 5:
            action = 0
        return action

In [428]:
# Instantiate the environment

config = one_vs_one_v3_env()
env_pom = CustomPomme(**config["env_kwargs"])

# config agents
agents = []

# Add simple agents
for agent_id in range(1):
    agents.append(StaticAgent(config["agent"](agent_id, config["game_type"])))
    
# add player agent(to train)
agents.append(PlayerAgent(config["agent"](1, config["game_type"])))

env_pom.set_agents(agents)
env_pom.set_training_agent(agents[1].agent_id)
env_pom.set_init_game_state(None)

# Seed and reset the environment
env_pom.seed(0)

[0]

In [429]:
# log function during training, implement if needed
def log(local_var, global_var):
    pass
#     display(local_var)
#     display(global_var)

In [430]:
n_cpu = 1
env = DummyVecEnv([lambda: env_pom for i in range(n_cpu)])

model = PPO2(CustomCNN, env, verbose=1, 
             n_steps = 3000, # batch_size = n_step * num_env
             ent_coef = 0.001, # entropy coefficient
             tensorboard_log="./ppo_pommerman_tensorboard/")
#model.load("ppo2_pommerman_500000_2")
model = model.learn(total_timesteps=5000, # num_update = total_timesteps // batch_size
                    callback = log)
model.save("ppo2_pommerman_500000_2")



--------------------------------------
| approxkl           | 1.707004e-05  |
| clipfrac           | 0.0           |
| explained_variance | -0.677        |
| fps                | 343           |
| n_updates          | 1             |
| policy_entropy     | 1.7917039     |
| policy_loss        | -0.0003792994 |
| serial_timesteps   | 3000          |
| time_elapsed       | 1.67e-06      |
| total_timesteps    | 3000          |
| value_loss         | 0.18028317    |
--------------------------------------


In [481]:
def train(env, path, timesteps):
    n_cpu = 1
    #env = DummyVecEnv([lambda: env_pom for i in range(n_cpu)])

    model = PPO2(CustomCNN, env, verbose=1, 
                 n_steps = 3000, # batch_size = n_step * num_env
                 ent_coef = 0.001, # entropy coefficient
                 tensorboard_log="./ppo_pommerman_tensorboard/")
    try:
        model.load(path)
    except ValueError: 
        pass
    model = model.learn(total_timesteps=timesteps, # num_update = total_timesteps // batch_size
                        callback = log)
    model.save(path)
    return model
    
#render: 0=no, 1=first game, 2=all
def validate(env, path, total, render=0):
    # del model # remove to demonstrate saving and loading
    model = PPO2.load(path)

    n_cpu = 1
    #env = DummyVecEnv([lambda: env_pom for i in range(n_cpu)])
    model.envs = env

    # test the learned model
    num_win = 0
    num_tie = 0
    num_lose = 0
    for i_episode in range(total):
        obs = env.reset()
        done = False
        info = None
        while not done:
            if render == 2 or (render == 1 and i_episode==0): env.render()
                
            action_training, _states = model.predict(obs)
    #         print(action_training)
            obs, rewards, dones, infos = env.step(action_training)
    #         print(infos)
            done = dones[0]
            info = infos[0]
            time.sleep(0.1)
        print('Episode {} finished'.format(i_episode))
        if(info["result"].value == 0):
            if(1 in info["winners"]):
                num_win+=1
            else:
                num_lose+=1
        elif(info["result"].value == 2):
            num_tie+=1
    #     print(info)
    env.close()
    print("Win ", num_win, "/", total, " games")
    print("Tie ", num_tie, "/", total, " games")
    print("Lose ", num_lose, "/", total, " games")
    return num_win, num_tie, num_lose

In [482]:
def define_env(config, agent, n_cpu=1):
    env_pom = CustomPomme(**config["env_kwargs"])

    # config agents
    agents = []

    # Add simple agents
    for agent_id in range(1):
        if agent == 'static':
            agents.append(StaticAgent(config["agent"](agent_id, config["game_type"])))
        elif agent == 'random':
            agents.append(RandomAgentNoBomb(config["agent"](agent_id, config["game_type"])))
        elif agent == 'simple':
            agents.append(SimpleAgent(config["agent"](agent_id, config["game_type"])))

    # add player agent(to train)
    agents.append(PlayerAgent(config["agent"](1, config["game_type"])))

    env_pom.set_agents(agents)
    env_pom.set_training_agent(agents[1].agent_id)
    env_pom.set_init_game_state(None)

    # Seed and reset the environment
    env_pom.seed(0)

    env = DummyVecEnv([lambda: env_pom for i in range(n_cpu)])
    return env

In [483]:
def lesson1_env():
    """Lesson 1-blank training config."""
    env = envs.v0.Pomme
    game_type = constants.GameType.OneVsOne
    env_entry_point = 'pommerman.envs.v0:Pomme'
    env_id = 'Lesson1-v0'
    env_kwargs = {
        'game_type': game_type,
        'board_size': 11,
        'num_rigid': 0,
        'num_wood': 0,
        'num_items': 0,
        'max_steps': 200,
        'render_fps': constants.RENDER_FPS,
        'rand_agent_pos': False,
        'env': env_entry_point,
    }
    agent = characters.Bomber
    return locals()

def lesson2_env():
    """Lesson 2-box training config."""
    env = envs.v0.Pomme
    game_type = constants.GameType.OneVsOne
    env_entry_point = 'pommerman.envs.v0:Pomme'
    env_id = 'Lesson2-v0'
    env_kwargs = {
        'game_type': game_type,
        'board_size': 11,
        'num_rigid': 0,
        'num_wood': 4,
        'num_items': 0,
        'max_steps': 200,
        'render_fps': constants.RENDER_FPS,
        'rand_agent_pos': False,
        'env': env_entry_point,
    }
    agent = characters.Bomber
    return locals()

def lesson2b_env():
    """Lesson 2b-box training config."""
    env = envs.v0.Pomme
    game_type = constants.GameType.OneVsOne
    env_entry_point = 'pommerman.envs.v0:Pomme'
    env_id = 'Lesson2b-v0'
    env_kwargs = {
        'game_type': game_type,
        'board_size': 11,
        'num_rigid': 0,
        'num_wood': 8,
        'num_items': 0,
        'max_steps': 200,
        'render_fps': constants.RENDER_FPS,
        'rand_agent_pos': False,
        'env': env_entry_point,
    }
    agent = characters.Bomber
    return locals()

def lesson2c_env():
    """Lesson 2c-box training config."""
    env = envs.v0.Pomme
    game_type = constants.GameType.OneVsOne
    env_entry_point = 'pommerman.envs.v0:Pomme'
    env_id = 'Lesson2c-v0'
    env_kwargs = {
        'game_type': game_type,
        'board_size': 11,
        'num_rigid': 0,
        'num_wood': 16,
        'num_items': 0,
        'max_steps': 300, #increasing timesteps to reduce reward sparsity per more challenging game
        'render_fps': constants.RENDER_FPS,
        'rand_agent_pos': False,
        'env': env_entry_point,
    }
    agent = characters.Bomber
    return locals()

def lesson2d_env():
    """Lesson 2d-box training config."""
    env = envs.v0.Pomme
    game_type = constants.GameType.OneVsOne
    env_entry_point = 'pommerman.envs.v0:Pomme'
    env_id = 'Lesson2d-v0'
    env_kwargs = {
        'game_type': game_type,
        'board_size': 11,
        'num_rigid': 0,
        'num_wood': constants.NUM_WOOD,
        'num_items': 0,
        'max_steps': 300,
        'render_fps': constants.RENDER_FPS,
        'rand_agent_pos': False,
        'env': env_entry_point,
    }
    agent = characters.Bomber
    return locals()

def lesson2e_env():
    """Lesson 2e-box training config."""
    env = envs.v0.Pomme
    game_type = constants.GameType.OneVsOne
    env_entry_point = 'pommerman.envs.v0:Pomme'
    env_id = 'Lesson2e-v0'
    env_kwargs = {
        'game_type': game_type,
        'board_size': 11,
        'num_rigid': 0,
        'num_wood': 72,
        'num_items': 0,
        'max_steps': 300,
        'render_fps': constants.RENDER_FPS,
        'rand_agent_pos': False,
        'env': env_entry_point,
    }
    agent = characters.Bomber
    return locals()

def lesson3_env():
    """Lesson 3-rigid training config."""
    env = envs.v0.Pomme
    game_type = constants.GameType.OneVsOne
    env_entry_point = 'pommerman.envs.v0:Pomme'
    env_id = 'Lesson3-v0'
    env_kwargs = {
        'game_type': game_type,
        'board_size': 11,
        'num_rigid': 4,
        'num_wood': constants.NUM_WOOD,
        'num_items': 0,
        'max_steps': 300,
        'render_fps': constants.RENDER_FPS,
        'rand_agent_pos': False,
        'env': env_entry_point,
    }
    agent = characters.Bomber
    return locals()

def lesson3b_env():
    """Lesson 3b-rigid training config."""
    env = envs.v0.Pomme
    game_type = constants.GameType.OneVsOne
    env_entry_point = 'pommerman.envs.v0:Pomme'
    env_id = 'Lesson3b-v0'
    env_kwargs = {
        'game_type': game_type,
        'board_size': 11,
        'num_rigid': 8,
        'num_wood': constants.NUM_WOOD,
        'num_items': 0,
        'max_steps': 300,
        'render_fps': constants.RENDER_FPS,
        'rand_agent_pos': False,
        'env': env_entry_point,
    }
    agent = characters.Bomber
    return locals()

def lesson3c_env():
    """Lesson 3c-rigid training config."""
    env = envs.v0.Pomme
    game_type = constants.GameType.OneVsOne
    env_entry_point = 'pommerman.envs.v0:Pomme'
    env_id = 'Lesson3c-v0'
    env_kwargs = {
        'game_type': game_type,
        'board_size': 11,
        'num_rigid': 16,
        'num_wood': constants.NUM_WOOD,
        'num_items': 0,
        'max_steps': 400, #increasing timesteps due to increasingly difficult environment
        'render_fps': constants.RENDER_FPS,
        'rand_agent_pos': False,
        'env': env_entry_point,
    }
    agent = characters.Bomber
    return locals()

def lesson3d_env():
    """Lesson 3d-rigid training config. Also used for lessons 4-7."""
    env = envs.v0.Pomme
    game_type = constants.GameType.OneVsOne
    env_entry_point = 'pommerman.envs.v0:Pomme'
    env_id = 'Lesson3d-v0'
    env_kwargs = {
        'game_type': game_type,
        'board_size': 11,
        'num_rigid': constants.NUM_RIGID,
        'num_wood': constants.NUM_WOOD,
        'num_items': 0,
        'max_steps': 400,
        'render_fps': constants.RENDER_FPS,
        'rand_agent_pos': False,
        'env': env_entry_point,
    }
    agent = characters.Bomber
    return locals()

def advanced_lesson_env():
    """Lesson 4+ training environment. Full number of timesteps"""
    env = envs.v0.Pomme
    game_type = constants.GameType.OneVsOne
    env_entry_point = 'pommerman.envs.v0:Pomme'
    env_id = 'AdvancedLesson-v0'
    env_kwargs = {
        'game_type': game_type,
        'board_size': 11,
        'num_rigid': constants.NUM_RIGID,
        'num_wood': constants.NUM_WOOD,
        'num_items': 0,
        'max_steps': constants.MAX_STEPS,
        'render_fps': constants.RENDER_FPS,
        'rand_agent_pos': False,
        'env': env_entry_point,
    }
    agent = characters.Bomber
    return locals()

In [484]:
conf1 = lesson1_env()
conf2 = lesson2_env()
conf3 = lesson2b_env()
conf4 = lesson2c_env()
conf5 = lesson2d_env()
conf6 = lesson2e_env()
conf7 = lesson3_env()
conf8 = lesson3b_env()
conf9 = lesson3c_env()
conf10 = lesson3d_env()
conf11 = advanced_lesson_env()

env1 = define_env(conf1, 'static')
env2 = define_env(conf2, 'static')
env3 = define_env(conf3, 'static')
env4 = define_env(conf4, 'static')
env5 = define_env(conf5, 'static')
env6 = define_env(conf6, 'static')
env7 = define_env(conf7, 'static')
env8 = define_env(conf8, 'static')
env9 = define_env(conf9, 'static')
env10 = define_env(conf10, 'static')
env11 = define_env(conf11, 'random')

env_list = [env1, env2, env3, env4, env5, env6, env7, env8, env9, env10, env11]

env_names = ['lesson1',
             'lesson2',
             'lesson2b',
             'lesson2c',
             'lesson2d',
             'lesson2e', 
             'lesson3',
             'lesson3b',
             'lesson3c',
             'lesson3d',
             'lesson4']

In [485]:
#No tansfers to establish a baseline
validate_num = 20

history = []
first = True

for i in range(len(env_list)):
    
    env = env_list[i]
    path = 'models/'+env_names[i]
    wins = []
    draws = []
    losses = []
    win = 0
    #if (not first):
    #    model.save(path)
    j=0
    while win < validate_num * .95:
        j+=1
        start = time.time()
        record = open('log.txt', 'a')
        print("Training", path)
        
        model = train(env, path, 500000)
        win, draw, loss = validate(env, path, validate_num)
        
        record.write(path +": "+ str(win) + " " + str(draw) +  " " + str(loss) + '\n')
        wins.append(win)
        draws.append(draw)
        losses.append(loss)
        record.close()
        print("1 epoch training time:", time.time() - start)
    history.append((wins, draws, losses))
    first = False




Training models/lesson1


Loading a model without an environment, this model cannot be trained until it has a valid environment.


--------------------------------------
| approxkl           | 1.1779324e-05 |
| clipfrac           | 0.0           |
| explained_variance | -0.2          |
| fps                | 357           |
| n_updates          | 1             |
| policy_entropy     | 1.7917386     |
| policy_loss        | -0.000351576  |
| serial_timesteps   | 3000          |
| time_elapsed       | 2.38e-06      |
| total_timesteps    | 3000          |
| value_loss         | 0.25519428    |
--------------------------------------
---------------------------------------
| approxkl           | 3.766695e-05   |
| clipfrac           | 0.0            |
| explained_variance | -0.208         |
| fps                | 316            |
| n_updates          | 2              |
| policy_entropy     | 1.7916179      |
| policy_loss        | -0.00066368334 |
| serial_timesteps   | 6000           |
| time_elapsed       | 8.4            |
| total_timesteps    | 6000           |
| value_loss         | 0.13115527     |
-------------

--------------------------------------
| approxkl           | 0.002657196   |
| clipfrac           | 0.0           |
| explained_variance | 0.021         |
| fps                | 338           |
| n_updates          | 4             |
| policy_entropy     | 1.7826461     |
| policy_loss        | -0.0073390757 |
| serial_timesteps   | 12000         |
| time_elapsed       | 28.6          |
| total_timesteps    | 12000         |
| value_loss         | 0.06321494    |
--------------------------------------
--------------------------------------
| approxkl           | 0.0064375564  |
| clipfrac           | 0.07333334    |
| explained_variance | -0.0607       |
| fps                | 342           |
| n_updates          | 5             |
| policy_entropy     | 1.7424164     |
| policy_loss        | -0.0071399594 |
| serial_timesteps   | 15000         |
| time_elapsed       | 37.5          |
| total_timesteps    | 15000         |
| value_loss         | 0.07347415    |
-------------------------

--------------------------------------
| approxkl           | 0.0024774075  |
| clipfrac           | 0.014583333   |
| explained_variance | 0.329         |
| fps                | 370           |
| n_updates          | 21            |
| policy_entropy     | 1.4914416     |
| policy_loss        | -0.0018920971 |
| serial_timesteps   | 63000         |
| time_elapsed       | 174           |
| total_timesteps    | 63000         |
| value_loss         | 0.057061404   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0006890861  |
| clipfrac           | 0.00925       |
| explained_variance | 0.246         |
| fps                | 359           |
| n_updates          | 22            |
| policy_entropy     | 1.4750359     |
| policy_loss        | -0.0017461837 |
| serial_timesteps   | 66000         |
| time_elapsed       | 182           |
| total_timesteps    | 66000         |
| value_loss         | 0.058377046   |
-------------------------

--------------------------------------
| approxkl           | 0.0019729824  |
| clipfrac           | 0.013166666   |
| explained_variance | 0.0622        |
| fps                | 347           |
| n_updates          | 38            |
| policy_entropy     | 1.1020579     |
| policy_loss        | -0.0024373052 |
| serial_timesteps   | 114000        |
| time_elapsed       | 320           |
| total_timesteps    | 114000        |
| value_loss         | 0.02151162    |
--------------------------------------
-------------------------------------
| approxkl           | 0.005355491  |
| clipfrac           | 0.057583336  |
| explained_variance | 0.3          |
| fps                | 326          |
| n_updates          | 39           |
| policy_entropy     | 1.0760415    |
| policy_loss        | -0.003506398 |
| serial_timesteps   | 117000       |
| time_elapsed       | 328          |
| total_timesteps    | 117000       |
| value_loss         | 0.023612697  |
-------------------------------------

-------------------------------------
| approxkl           | 0.0045385063 |
| clipfrac           | 0.043833334  |
| explained_variance | 0.656        |
| fps                | 354          |
| n_updates          | 55           |
| policy_entropy     | 0.7470024    |
| policy_loss        | -0.003833119 |
| serial_timesteps   | 165000       |
| time_elapsed       | 463          |
| total_timesteps    | 165000       |
| value_loss         | 0.007699794  |
-------------------------------------
--------------------------------------
| approxkl           | 0.007721597   |
| clipfrac           | 0.08116666    |
| explained_variance | 0.717         |
| fps                | 349           |
| n_updates          | 56            |
| policy_entropy     | 0.7116396     |
| policy_loss        | -0.0058268765 |
| serial_timesteps   | 168000        |
| time_elapsed       | 472           |
| total_timesteps    | 168000        |
| value_loss         | 0.008100372   |
--------------------------------------

-------------------------------------
| approxkl           | 0.0050269943 |
| clipfrac           | 0.067666665  |
| explained_variance | 0.389        |
| fps                | 329          |
| n_updates          | 72           |
| policy_entropy     | 0.54549116   |
| policy_loss        | -0.003872762 |
| serial_timesteps   | 216000       |
| time_elapsed       | 618          |
| total_timesteps    | 216000       |
| value_loss         | 0.009551795  |
-------------------------------------
--------------------------------------
| approxkl           | 0.007479806   |
| clipfrac           | 0.08433333    |
| explained_variance | 0.323         |
| fps                | 338           |
| n_updates          | 73            |
| policy_entropy     | 0.51523376    |
| policy_loss        | -0.0029400554 |
| serial_timesteps   | 219000        |
| time_elapsed       | 627           |
| total_timesteps    | 219000        |
| value_loss         | 0.009756986   |
--------------------------------------

--------------------------------------
| approxkl           | 0.006115529   |
| clipfrac           | 0.074083336   |
| explained_variance | 0.408         |
| fps                | 335           |
| n_updates          | 89            |
| policy_entropy     | 0.45636418    |
| policy_loss        | -0.0028130214 |
| serial_timesteps   | 267000        |
| time_elapsed       | 771           |
| total_timesteps    | 267000        |
| value_loss         | 0.0027406549  |
--------------------------------------
---------------------------------------
| approxkl           | 0.0022392632   |
| clipfrac           | 0.029          |
| explained_variance | -0.55          |
| fps                | 316            |
| n_updates          | 90             |
| policy_entropy     | 0.43727875     |
| policy_loss        | -0.00090864784 |
| serial_timesteps   | 270000         |
| time_elapsed       | 780            |
| total_timesteps    | 270000         |
| value_loss         | 0.006479005    |
-------------

--------------------------------------
| approxkl           | 0.0033637942  |
| clipfrac           | 0.041833334   |
| explained_variance | 0.133         |
| fps                | 341           |
| n_updates          | 106           |
| policy_entropy     | 0.36866522    |
| policy_loss        | -0.0016545457 |
| serial_timesteps   | 318000        |
| time_elapsed       | 923           |
| total_timesteps    | 318000        |
| value_loss         | 0.0026810267  |
--------------------------------------
--------------------------------------
| approxkl           | 0.008995292   |
| clipfrac           | 0.097500004   |
| explained_variance | 0.472         |
| fps                | 342           |
| n_updates          | 107           |
| policy_entropy     | 0.36090943    |
| policy_loss        | -0.0044869585 |
| serial_timesteps   | 321000        |
| time_elapsed       | 932           |
| total_timesteps    | 321000        |
| value_loss         | 0.006618371   |
-------------------------

-------------------------------------
| approxkl           | 0.004475286  |
| clipfrac           | 0.047916666  |
| explained_variance | 0.875        |
| fps                | 340          |
| n_updates          | 123          |
| policy_entropy     | 0.25479954   |
| policy_loss        | -0.002407164 |
| serial_timesteps   | 369000       |
| time_elapsed       | 1.07e+03     |
| total_timesteps    | 369000       |
| value_loss         | 0.002022827  |
-------------------------------------
--------------------------------------
| approxkl           | 0.0032360882  |
| clipfrac           | 0.042999998   |
| explained_variance | 0.613         |
| fps                | 334           |
| n_updates          | 124           |
| policy_entropy     | 0.25308022    |
| policy_loss        | -8.123662e-05 |
| serial_timesteps   | 372000        |
| time_elapsed       | 1.08e+03      |
| total_timesteps    | 372000        |
| value_loss         | 0.00087648875 |
--------------------------------------

--------------------------------------
| approxkl           | 0.0035912117  |
| clipfrac           | 0.03975       |
| explained_variance | 0.458         |
| fps                | 298           |
| n_updates          | 140           |
| policy_entropy     | 0.20151706    |
| policy_loss        | -0.0010181998 |
| serial_timesteps   | 420000        |
| time_elapsed       | 1.22e+03      |
| total_timesteps    | 420000        |
| value_loss         | 0.0042502373  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0026575478  |
| clipfrac           | 0.024833333   |
| explained_variance | 0.94          |
| fps                | 273           |
| n_updates          | 141           |
| policy_entropy     | 0.18598351    |
| policy_loss        | -0.003251094  |
| serial_timesteps   | 423000        |
| time_elapsed       | 1.23e+03      |
| total_timesteps    | 423000        |
| value_loss         | 0.00078220526 |
-------------------------

--------------------------------------
| approxkl           | 0.0041078003  |
| clipfrac           | 0.04741667    |
| explained_variance | 0.953         |
| fps                | 336           |
| n_updates          | 157           |
| policy_entropy     | 0.27006555    |
| policy_loss        | -0.0038528147 |
| serial_timesteps   | 471000        |
| time_elapsed       | 1.39e+03      |
| total_timesteps    | 471000        |
| value_loss         | 0.0029682799  |
--------------------------------------
-------------------------------------
| approxkl           | 0.006348513  |
| clipfrac           | 0.08308333   |
| explained_variance | -0.127       |
| fps                | 349          |
| n_updates          | 158          |
| policy_entropy     | 0.28149515   |
| policy_loss        | -0.004766254 |
| serial_timesteps   | 474000       |
| time_elapsed       | 1.4e+03      |
| total_timesteps    | 474000       |
| value_loss         | 0.005327908  |
-------------------------------------



Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished
Episode 7 finished
Episode 8 finished
Episode 9 finished
Episode 10 finished
Episode 11 finished
Episode 12 finished
Episode 13 finished
Episode 14 finished
Episode 15 finished
Episode 16 finished
Episode 17 finished
Episode 18 finished
Episode 19 finished
Win  20 / 20  games
Tie  0 / 20  games
Lose  0 / 20  games
1 epoch training time: 1537.9369552135468
Training models/lesson2


--------------------------------------
| approxkl           | 1.7501368e-05 |
| clipfrac           | 0.0           |
| explained_variance | -0.145        |
| fps                | 337           |
| n_updates          | 1             |
| policy_entropy     | 1.7917117     |
| policy_loss        | -0.0004581386 |
| serial_timesteps   | 3000          |
| time_elapsed       | 2.38e-06      |
| total_timesteps    | 3000          |
| value_loss         | 0.34369656    |
--------------------------------------
--------------------------------------
| approxkl           | 9.997129e-05  |
| clipfrac           | 0.0           |
| explained_variance | -0.0224       |
| fps                | 345           |
| n_updates          | 2             |
| policy_entropy     | 1.7913617     |
| policy_loss        | -0.0011589581 |
| serial_timesteps   | 6000          |
| time_elapsed       | 8.91          |
| total_timesteps    | 6000          |
| value_loss         | 0.12566794    |
-------------------------

---------------------------------------
| approxkl           | 0.0001322171   |
| clipfrac           | 0.0            |
| explained_variance | 0.215          |
| fps                | 342            |
| n_updates          | 9              |
| policy_entropy     | 1.581059       |
| policy_loss        | -0.00049979414 |
| serial_timesteps   | 27000          |
| time_elapsed       | 68.8           |
| total_timesteps    | 27000          |
| value_loss         | 0.054391697    |
---------------------------------------
---------------------------------------
| approxkl           | 0.00032858236  |
| clipfrac           | 0.0            |
| explained_variance | 0.245          |
| fps                | 345            |
| n_updates          | 10             |
| policy_entropy     | 1.5744556      |
| policy_loss        | -0.00068711897 |
| serial_timesteps   | 30000          |
| time_elapsed       | 77.5           |
| total_timesteps    | 30000          |
| value_loss         | 0.059612487    |


--------------------------------------
| approxkl           | 0.001237069   |
| clipfrac           | 0.011583332   |
| explained_variance | 0.302         |
| fps                | 344           |
| n_updates          | 26            |
| policy_entropy     | 1.4446653     |
| policy_loss        | -0.0019264665 |
| serial_timesteps   | 78000         |
| time_elapsed       | 214           |
| total_timesteps    | 78000         |
| value_loss         | 0.03787793    |
--------------------------------------
-------------------------------------
| approxkl           | 0.0022493114 |
| clipfrac           | 0.022833332  |
| explained_variance | 0.358        |
| fps                | 344          |
| n_updates          | 27           |
| policy_entropy     | 1.3877547    |
| policy_loss        | -0.003388871 |
| serial_timesteps   | 81000        |
| time_elapsed       | 223          |
| total_timesteps    | 81000        |
| value_loss         | 0.04302539   |
-------------------------------------

--------------------------------------
| approxkl           | 0.0014798981  |
| clipfrac           | 0.013166667   |
| explained_variance | 0.0481        |
| fps                | 344           |
| n_updates          | 43            |
| policy_entropy     | 1.1497393     |
| policy_loss        | -0.0017301907 |
| serial_timesteps   | 129000        |
| time_elapsed       | 363           |
| total_timesteps    | 129000        |
| value_loss         | 0.02692734    |
--------------------------------------
-------------------------------------
| approxkl           | 0.0008785298 |
| clipfrac           | 0.008416667  |
| explained_variance | 0.0111       |
| fps                | 321          |
| n_updates          | 44           |
| policy_entropy     | 1.1298926    |
| policy_loss        | -0.003024294 |
| serial_timesteps   | 132000       |
| time_elapsed       | 372          |
| total_timesteps    | 132000       |
| value_loss         | 0.03565432   |
-------------------------------------

--------------------------------------
| approxkl           | 0.004784811   |
| clipfrac           | 0.05125       |
| explained_variance | 0.242         |
| fps                | 215           |
| n_updates          | 60            |
| policy_entropy     | 1.0237316     |
| policy_loss        | -0.0042640585 |
| serial_timesteps   | 180000        |
| time_elapsed       | 517           |
| total_timesteps    | 180000        |
| value_loss         | 0.017648503   |
--------------------------------------
--------------------------------------
| approxkl           | 0.005281607   |
| clipfrac           | 0.052333333   |
| explained_variance | 0.381         |
| fps                | 269           |
| n_updates          | 61            |
| policy_entropy     | 1.0693977     |
| policy_loss        | -0.0023332685 |
| serial_timesteps   | 183000        |
| time_elapsed       | 531           |
| total_timesteps    | 183000        |
| value_loss         | 0.012213584   |
-------------------------

--------------------------------------
| approxkl           | 0.0040549506  |
| clipfrac           | 0.045333333   |
| explained_variance | 0.219         |
| fps                | 350           |
| n_updates          | 77            |
| policy_entropy     | 0.99584377    |
| policy_loss        | -0.0042569973 |
| serial_timesteps   | 231000        |
| time_elapsed       | 673           |
| total_timesteps    | 231000        |
| value_loss         | 0.017542424   |
--------------------------------------
--------------------------------------
| approxkl           | 0.005653779   |
| clipfrac           | 0.041083336   |
| explained_variance | 0.306         |
| fps                | 351           |
| n_updates          | 78            |
| policy_entropy     | 1.029001      |
| policy_loss        | -0.0056854654 |
| serial_timesteps   | 234000        |
| time_elapsed       | 681           |
| total_timesteps    | 234000        |
| value_loss         | 0.012883215   |
-------------------------

-------------------------------------
| approxkl           | 0.010625285  |
| clipfrac           | 0.06583333   |
| explained_variance | 0.222        |
| fps                | 336          |
| n_updates          | 94           |
| policy_entropy     | 0.9084908    |
| policy_loss        | -0.003654405 |
| serial_timesteps   | 282000       |
| time_elapsed       | 829          |
| total_timesteps    | 282000       |
| value_loss         | 0.021563316  |
-------------------------------------
--------------------------------------
| approxkl           | 0.005731274   |
| clipfrac           | 0.05558333    |
| explained_variance | 0.188         |
| fps                | 344           |
| n_updates          | 95            |
| policy_entropy     | 0.88454396    |
| policy_loss        | -0.0038123345 |
| serial_timesteps   | 285000        |
| time_elapsed       | 837           |
| total_timesteps    | 285000        |
| value_loss         | 0.024749778   |
--------------------------------------

-------------------------------------
| approxkl           | 0.008759721  |
| clipfrac           | 0.13191667   |
| explained_variance | 0.152        |
| fps                | 342          |
| n_updates          | 111          |
| policy_entropy     | 0.8056447    |
| policy_loss        | -0.005153966 |
| serial_timesteps   | 333000       |
| time_elapsed       | 981          |
| total_timesteps    | 333000       |
| value_loss         | 0.013499532  |
-------------------------------------
-------------------------------------
| approxkl           | 0.0036620572 |
| clipfrac           | 0.03575      |
| explained_variance | 0.4          |
| fps                | 309          |
| n_updates          | 112          |
| policy_entropy     | 0.6995164    |
| policy_loss        | -0.007818995 |
| serial_timesteps   | 336000       |
| time_elapsed       | 990          |
| total_timesteps    | 336000       |
| value_loss         | 0.010386813  |
-------------------------------------
------------

--------------------------------------
| approxkl           | 0.014475971   |
| clipfrac           | 0.041833334   |
| explained_variance | 0.0479        |
| fps                | 339           |
| n_updates          | 128           |
| policy_entropy     | 0.59008694    |
| policy_loss        | -0.0051574074 |
| serial_timesteps   | 384000        |
| time_elapsed       | 1.13e+03      |
| total_timesteps    | 384000        |
| value_loss         | 0.018227573   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0048900116  |
| clipfrac           | 0.034666665   |
| explained_variance | 0.287         |
| fps                | 332           |
| n_updates          | 129           |
| policy_entropy     | 0.6139062     |
| policy_loss        | -0.0019575704 |
| serial_timesteps   | 387000        |
| time_elapsed       | 1.14e+03      |
| total_timesteps    | 387000        |
| value_loss         | 0.00979407    |
-------------------------

-------------------------------------
| approxkl           | 0.004980321  |
| clipfrac           | 0.05125      |
| explained_variance | 0.368        |
| fps                | 348          |
| n_updates          | 145          |
| policy_entropy     | 0.53575677   |
| policy_loss        | -0.008707399 |
| serial_timesteps   | 435000       |
| time_elapsed       | 1.29e+03     |
| total_timesteps    | 435000       |
| value_loss         | 0.0062253186 |
-------------------------------------
--------------------------------------
| approxkl           | 0.0037461251  |
| clipfrac           | 0.040999997   |
| explained_variance | 0.32          |
| fps                | 332           |
| n_updates          | 146           |
| policy_entropy     | 0.52641255    |
| policy_loss        | -0.0038209977 |
| serial_timesteps   | 438000        |
| time_elapsed       | 1.3e+03       |
| total_timesteps    | 438000        |
| value_loss         | 0.022468202   |
--------------------------------------

--------------------------------------
| approxkl           | 0.008816806   |
| clipfrac           | 0.08975       |
| explained_variance | 0.334         |
| fps                | 326           |
| n_updates          | 162           |
| policy_entropy     | 0.5264768     |
| policy_loss        | 0.00036364287 |
| serial_timesteps   | 486000        |
| time_elapsed       | 1.45e+03      |
| total_timesteps    | 486000        |
| value_loss         | 0.01172403    |
--------------------------------------
---------------------------------------
| approxkl           | 0.0035345135   |
| clipfrac           | 0.04708333     |
| explained_variance | 0.357          |
| fps                | 328            |
| n_updates          | 163            |
| policy_entropy     | 0.4905246      |
| policy_loss        | -0.00047934783 |
| serial_timesteps   | 489000         |
| time_elapsed       | 1.46e+03       |
| total_timesteps    | 489000         |
| value_loss         | 0.0074021574   |
-------------

Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished
Episode 7 finished
Episode 8 finished
Episode 9 finished
Episode 10 finished
Episode 11 finished
Episode 12 finished
Episode 13 finished
Episode 14 finished
Episode 15 finished
Episode 16 finished
Episode 17 finished
Episode 18 finished
Episode 19 finished
Win  19 / 20  games
Tie  1 / 20  games
Lose  0 / 20  games
1 epoch training time: 1577.895031929016
Training models/lesson2b




---------------------------------------
| approxkl           | 1.0014329e-05  |
| clipfrac           | 0.0            |
| explained_variance | -0.118         |
| fps                | 325            |
| n_updates          | 1              |
| policy_entropy     | 1.7917004      |
| policy_loss        | -0.00021792372 |
| serial_timesteps   | 3000           |
| time_elapsed       | 1.91e-06       |
| total_timesteps    | 3000           |
| value_loss         | 0.19924146     |
---------------------------------------
-------------------------------------
| approxkl           | 4.867328e-05 |
| clipfrac           | 0.0          |
| explained_variance | -0.0351      |
| fps                | 325          |
| n_updates          | 2            |
| policy_entropy     | 1.7914774    |
| policy_loss        | -0.000758384 |
| serial_timesteps   | 6000         |
| time_elapsed       | 9.21         |
| total_timesteps    | 6000         |
| value_loss         | 0.11627059   |
------------------------

--------------------------------------
| approxkl           | 0.00052559236 |
| clipfrac           | 0.0           |
| explained_variance | 0.24          |
| fps                | 329           |
| n_updates          | 14            |
| policy_entropy     | 1.565675      |
| policy_loss        | -0.0009267301 |
| serial_timesteps   | 42000         |
| time_elapsed       | 119           |
| total_timesteps    | 42000         |
| value_loss         | 0.032161873   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0028867435  |
| clipfrac           | 0.018916667   |
| explained_variance | 0.254         |
| fps                | 329           |
| n_updates          | 15            |
| policy_entropy     | 1.53081       |
| policy_loss        | -0.0032550392 |
| serial_timesteps   | 45000         |
| time_elapsed       | 128           |
| total_timesteps    | 45000         |
| value_loss         | 0.023339689   |
-------------------------

--------------------------------------
| approxkl           | 0.00040934765 |
| clipfrac           | 0.002         |
| explained_variance | 0.103         |
| fps                | 312           |
| n_updates          | 31            |
| policy_entropy     | 1.4395907     |
| policy_loss        | -0.0014731592 |
| serial_timesteps   | 93000         |
| time_elapsed       | 273           |
| total_timesteps    | 93000         |
| value_loss         | 0.034598652   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0015788255  |
| clipfrac           | 0.005916667   |
| explained_variance | 0.166         |
| fps                | 329           |
| n_updates          | 32            |
| policy_entropy     | 1.425487      |
| policy_loss        | -0.0016225158 |
| serial_timesteps   | 96000         |
| time_elapsed       | 283           |
| total_timesteps    | 96000         |
| value_loss         | 0.024730945   |
-------------------------

--------------------------------------
| approxkl           | 0.0046601435  |
| clipfrac           | 0.062416665   |
| explained_variance | 0.307         |
| fps                | 358           |
| n_updates          | 48            |
| policy_entropy     | 1.2924448     |
| policy_loss        | -0.0032472806 |
| serial_timesteps   | 144000        |
| time_elapsed       | 419           |
| total_timesteps    | 144000        |
| value_loss         | 0.014646375   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0057279053 |
| clipfrac           | 0.056916665  |
| explained_variance | 0.264        |
| fps                | 357          |
| n_updates          | 49           |
| policy_entropy     | 1.2520981    |
| policy_loss        | -0.00321769  |
| serial_timesteps   | 147000       |
| time_elapsed       | 427          |
| total_timesteps    | 147000       |
| value_loss         | 0.023976129  |
-------------------------------------

--------------------------------------
| approxkl           | 0.0037154632  |
| clipfrac           | 0.05475       |
| explained_variance | 0.344         |
| fps                | 362           |
| n_updates          | 65            |
| policy_entropy     | 1.0438566     |
| policy_loss        | -0.0030032024 |
| serial_timesteps   | 195000        |
| time_elapsed       | 561           |
| total_timesteps    | 195000        |
| value_loss         | 0.022842819   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0039847647  |
| clipfrac           | 0.0435        |
| explained_variance | 0.306         |
| fps                | 360           |
| n_updates          | 66            |
| policy_entropy     | 1.1052622     |
| policy_loss        | -0.0033707079 |
| serial_timesteps   | 198000        |
| time_elapsed       | 569           |
| total_timesteps    | 198000        |
| value_loss         | 0.02603639    |
-------------------------

-------------------------------------
| approxkl           | 0.011448168  |
| clipfrac           | 0.21775001   |
| explained_variance | 0.322        |
| fps                | 359          |
| n_updates          | 82           |
| policy_entropy     | 1.0272967    |
| policy_loss        | -0.004601492 |
| serial_timesteps   | 246000       |
| time_elapsed       | 702          |
| total_timesteps    | 246000       |
| value_loss         | 0.01987541   |
-------------------------------------
-------------------------------------
| approxkl           | 0.008746338  |
| clipfrac           | 0.14316666   |
| explained_variance | 0.394        |
| fps                | 368          |
| n_updates          | 83           |
| policy_entropy     | 1.0126356    |
| policy_loss        | -0.005310048 |
| serial_timesteps   | 249000       |
| time_elapsed       | 710          |
| total_timesteps    | 249000       |
| value_loss         | 0.021667477  |
-------------------------------------
------------

-------------------------------------
| approxkl           | 0.0035272827 |
| clipfrac           | 0.018583333  |
| explained_variance | 0.561        |
| fps                | 360          |
| n_updates          | 99           |
| policy_entropy     | 0.94583386   |
| policy_loss        | -0.003401558 |
| serial_timesteps   | 297000       |
| time_elapsed       | 843          |
| total_timesteps    | 297000       |
| value_loss         | 0.013647604  |
-------------------------------------
--------------------------------------
| approxkl           | 0.0022902796  |
| clipfrac           | 0.02025       |
| explained_variance | 0.377         |
| fps                | 368           |
| n_updates          | 100           |
| policy_entropy     | 0.9247529     |
| policy_loss        | -0.0027117976 |
| serial_timesteps   | 300000        |
| time_elapsed       | 851           |
| total_timesteps    | 300000        |
| value_loss         | 0.02486852    |
--------------------------------------

--------------------------------------
| approxkl           | 0.0032228334  |
| clipfrac           | 0.038666666   |
| explained_variance | 0.274         |
| fps                | 356           |
| n_updates          | 116           |
| policy_entropy     | 0.69312656    |
| policy_loss        | -0.0022996329 |
| serial_timesteps   | 348000        |
| time_elapsed       | 984           |
| total_timesteps    | 348000        |
| value_loss         | 0.021326693   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0029320181 |
| clipfrac           | 0.038916666  |
| explained_variance | 0.451        |
| fps                | 365          |
| n_updates          | 117          |
| policy_entropy     | 0.68869925   |
| policy_loss        | -0.001995027 |
| serial_timesteps   | 351000       |
| time_elapsed       | 993          |
| total_timesteps    | 351000       |
| value_loss         | 0.027000016  |
-------------------------------------

--------------------------------------
| approxkl           | 0.008216439   |
| clipfrac           | 0.097833335   |
| explained_variance | 0.311         |
| fps                | 358           |
| n_updates          | 133           |
| policy_entropy     | 0.61158       |
| policy_loss        | -0.0058306055 |
| serial_timesteps   | 399000        |
| time_elapsed       | 1.13e+03      |
| total_timesteps    | 399000        |
| value_loss         | 0.0147708785  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0040336954  |
| clipfrac           | 0.054416664   |
| explained_variance | 0.55          |
| fps                | 363           |
| n_updates          | 134           |
| policy_entropy     | 0.6096805     |
| policy_loss        | -0.0017071363 |
| serial_timesteps   | 402000        |
| time_elapsed       | 1.13e+03      |
| total_timesteps    | 402000        |
| value_loss         | 0.0075946962  |
-------------------------

-------------------------------------
| approxkl           | 0.004306458  |
| clipfrac           | 0.046166666  |
| explained_variance | 0.175        |
| fps                | 358          |
| n_updates          | 150          |
| policy_entropy     | 0.59521055   |
| policy_loss        | -0.004830239 |
| serial_timesteps   | 450000       |
| time_elapsed       | 1.27e+03     |
| total_timesteps    | 450000       |
| value_loss         | 0.013743226  |
-------------------------------------
--------------------------------------
| approxkl           | 0.006304328   |
| clipfrac           | 0.065916665   |
| explained_variance | 0.262         |
| fps                | 365           |
| n_updates          | 151           |
| policy_entropy     | 0.6031685     |
| policy_loss        | -0.0051619285 |
| serial_timesteps   | 453000        |
| time_elapsed       | 1.28e+03      |
| total_timesteps    | 453000        |
| value_loss         | 0.012682265   |
--------------------------------------

Loading a model without an environment, this model cannot be trained until it has a valid environment.


Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished
Episode 7 finished
Episode 8 finished
Episode 9 finished
Episode 10 finished
Episode 11 finished
Episode 12 finished
Episode 13 finished
Episode 14 finished
Episode 15 finished
Episode 16 finished
Episode 17 finished
Episode 18 finished
Episode 19 finished
Win  20 / 20  games
Tie  0 / 20  games
Lose  0 / 20  games
1 epoch training time: 1476.266128063202
Training models/lesson2c


---------------------------------------
| approxkl           | 1.2934503e-05  |
| clipfrac           | 0.0            |
| explained_variance | -0.37          |
| fps                | 354            |
| n_updates          | 1              |
| policy_entropy     | 1.7917366      |
| policy_loss        | -0.00044577772 |
| serial_timesteps   | 3000           |
| time_elapsed       | 1.91e-06       |
| total_timesteps    | 3000           |
| value_loss         | 0.21721324     |
---------------------------------------
-------------------------------------
| approxkl           | 0.0001334752 |
| clipfrac           | 0.0          |
| explained_variance | -0.469       |
| fps                | 361          |
| n_updates          | 2            |
| policy_entropy     | 1.7914238    |
| policy_loss        | -0.001010303 |
| serial_timesteps   | 6000         |
| time_elapsed       | 8.48         |
| total_timesteps    | 6000         |
| value_loss         | 0.014533407  |
------------------------

-------------------------------------
| approxkl           | 0.0021482632 |
| clipfrac           | 0.0          |
| explained_variance | 0.0497       |
| fps                | 367          |
| n_updates          | 3            |
| policy_entropy     | 1.7859991    |
| policy_loss        | -0.004738126 |
| serial_timesteps   | 9000         |
| time_elapsed       | 16.8         |
| total_timesteps    | 9000         |
| value_loss         | 0.013614919  |
-------------------------------------
--------------------------------------
| approxkl           | 0.0062107234  |
| clipfrac           | 0.0475        |
| explained_variance | -0.0572       |
| fps                | 363           |
| n_updates          | 4             |
| policy_entropy     | 1.7493943     |
| policy_loss        | -0.0043433225 |
| serial_timesteps   | 12000         |
| time_elapsed       | 24.9          |
| total_timesteps    | 12000         |
| value_loss         | 0.026419519   |
--------------------------------------

--------------------------------------
| approxkl           | 0.0015352503  |
| clipfrac           | 0.011666667   |
| explained_variance | 0.158         |
| fps                | 377           |
| n_updates          | 20            |
| policy_entropy     | 1.5903583     |
| policy_loss        | -0.0016786808 |
| serial_timesteps   | 60000         |
| time_elapsed       | 155           |
| total_timesteps    | 60000         |
| value_loss         | 0.027747475   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0030131652  |
| clipfrac           | 0.0019999999  |
| explained_variance | 0.121         |
| fps                | 371           |
| n_updates          | 21            |
| policy_entropy     | 1.5839115     |
| policy_loss        | -0.0027849192 |
| serial_timesteps   | 63000         |
| time_elapsed       | 163           |
| total_timesteps    | 63000         |
| value_loss         | 0.01609888    |
-------------------------

--------------------------------------
| approxkl           | 0.004042567   |
| clipfrac           | 0.023583334   |
| explained_variance | 0.0478        |
| fps                | 372           |
| n_updates          | 37            |
| policy_entropy     | 1.4976656     |
| policy_loss        | -0.0022532917 |
| serial_timesteps   | 111000        |
| time_elapsed       | 293           |
| total_timesteps    | 111000        |
| value_loss         | 0.014277662   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0057924744  |
| clipfrac           | 0.081583336   |
| explained_variance | -0.0238       |
| fps                | 376           |
| n_updates          | 38            |
| policy_entropy     | 1.415947      |
| policy_loss        | -0.0016179325 |
| serial_timesteps   | 114000        |
| time_elapsed       | 301           |
| total_timesteps    | 114000        |
| value_loss         | 0.017326664   |
-------------------------

--------------------------------------
| approxkl           | 0.00045189454 |
| clipfrac           | 0.0           |
| explained_variance | 0.0372        |
| fps                | 372           |
| n_updates          | 54            |
| policy_entropy     | 1.0806444     |
| policy_loss        | 8.859412e-05  |
| serial_timesteps   | 162000        |
| time_elapsed       | 429           |
| total_timesteps    | 162000        |
| value_loss         | 0.011032039   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00080600125 |
| clipfrac           | 0.00425       |
| explained_variance | -0.0289       |
| fps                | 378           |
| n_updates          | 55            |
| policy_entropy     | 1.121325      |
| policy_loss        | -0.0007448372 |
| serial_timesteps   | 165000        |
| time_elapsed       | 437           |
| total_timesteps    | 165000        |
| value_loss         | 0.02850175    |
-------------------------

--------------------------------------
| approxkl           | 0.0020370826  |
| clipfrac           | 0.044916667   |
| explained_variance | 0.103         |
| fps                | 372           |
| n_updates          | 71            |
| policy_entropy     | 0.7199994     |
| policy_loss        | -0.0016201417 |
| serial_timesteps   | 213000        |
| time_elapsed       | 565           |
| total_timesteps    | 213000        |
| value_loss         | 0.011603964   |
--------------------------------------
---------------------------------------
| approxkl           | 0.002362158    |
| clipfrac           | 0.05716667     |
| explained_variance | 0.136          |
| fps                | 379            |
| n_updates          | 72             |
| policy_entropy     | 0.787044       |
| policy_loss        | -0.00040689955 |
| serial_timesteps   | 216000         |
| time_elapsed       | 573            |
| total_timesteps    | 216000         |
| value_loss         | 0.008952879    |
-------------

--------------------------------------
| approxkl           | 0.0014386093  |
| clipfrac           | 0.0           |
| explained_variance | 0.00684       |
| fps                | 372           |
| n_updates          | 88            |
| policy_entropy     | 0.88895553    |
| policy_loss        | -0.0006010562 |
| serial_timesteps   | 264000        |
| time_elapsed       | 701           |
| total_timesteps    | 264000        |
| value_loss         | 0.016766742   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0025319974  |
| clipfrac           | 0.052333333   |
| explained_variance | -0.0127       |
| fps                | 375           |
| n_updates          | 89            |
| policy_entropy     | 0.97180367    |
| policy_loss        | -0.0018627904 |
| serial_timesteps   | 267000        |
| time_elapsed       | 709           |
| total_timesteps    | 267000        |
| value_loss         | 0.013951199   |
-------------------------

--------------------------------------
| approxkl           | 0.00089794036 |
| clipfrac           | 0.008833333   |
| explained_variance | 0.0189        |
| fps                | 371           |
| n_updates          | 105           |
| policy_entropy     | 1.026695      |
| policy_loss        | -0.0007354852 |
| serial_timesteps   | 315000        |
| time_elapsed       | 839           |
| total_timesteps    | 315000        |
| value_loss         | 0.0063567767  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0048645064  |
| clipfrac           | 0.061583336   |
| explained_variance | 0.103         |
| fps                | 369           |
| n_updates          | 106           |
| policy_entropy     | 0.94041723    |
| policy_loss        | -0.0025024135 |
| serial_timesteps   | 318000        |
| time_elapsed       | 847           |
| total_timesteps    | 318000        |
| value_loss         | 0.008926782   |
-------------------------

-------------------------------------
| approxkl           | 0.003140041  |
| clipfrac           | 0.055083334  |
| explained_variance | 0.00242      |
| fps                | 372          |
| n_updates          | 122          |
| policy_entropy     | 0.88248026   |
| policy_loss        | -0.002344762 |
| serial_timesteps   | 366000       |
| time_elapsed       | 975          |
| total_timesteps    | 366000       |
| value_loss         | 0.008191978  |
-------------------------------------
---------------------------------------
| approxkl           | 0.00036299624  |
| clipfrac           | 0.00024999998  |
| explained_variance | 0.0333         |
| fps                | 378            |
| n_updates          | 123            |
| policy_entropy     | 0.8533139      |
| policy_loss        | -0.00016690297 |
| serial_timesteps   | 369000         |
| time_elapsed       | 983            |
| total_timesteps    | 369000         |
| value_loss         | 0.0108595975   |
--------------------------

--------------------------------------
| approxkl           | 0.0006548769  |
| clipfrac           | 0.0029166667  |
| explained_variance | 0.00797       |
| fps                | 373           |
| n_updates          | 139           |
| policy_entropy     | 1.1462885     |
| policy_loss        | -0.0005218669 |
| serial_timesteps   | 417000        |
| time_elapsed       | 1.11e+03      |
| total_timesteps    | 417000        |
| value_loss         | 0.0075956066  |
--------------------------------------
--------------------------------------
| approxkl           | 0.003198471   |
| clipfrac           | 0.0           |
| explained_variance | 0.00756       |
| fps                | 369           |
| n_updates          | 140           |
| policy_entropy     | 1.1265076     |
| policy_loss        | -0.0031831237 |
| serial_timesteps   | 420000        |
| time_elapsed       | 1.12e+03      |
| total_timesteps    | 420000        |
| value_loss         | 0.0052942596  |
-------------------------

--------------------------------------
| approxkl           | 0.0016059098  |
| clipfrac           | 0.024916666   |
| explained_variance | 0.0389        |
| fps                | 371           |
| n_updates          | 156           |
| policy_entropy     | 1.0543594     |
| policy_loss        | -0.0011162938 |
| serial_timesteps   | 468000        |
| time_elapsed       | 1.25e+03      |
| total_timesteps    | 468000        |
| value_loss         | 0.009959066   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0005869728  |
| clipfrac           | 0.0           |
| explained_variance | -0.014        |
| fps                | 378           |
| n_updates          | 157           |
| policy_entropy     | 1.0374231     |
| policy_loss        | -4.034211e-05 |
| serial_timesteps   | 471000        |
| time_elapsed       | 1.25e+03      |
| total_timesteps    | 471000        |
| value_loss         | 0.00968781    |
-------------------------



Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished
Episode 7 finished
Episode 8 finished
Episode 9 finished
Episode 10 finished
Episode 11 finished
Episode 12 finished
Episode 13 finished
Episode 14 finished
Episode 15 finished
Episode 16 finished
Episode 17 finished
Episode 18 finished
Episode 19 finished
Win  0 / 20  games
Tie  20 / 20  games
Lose  0 / 20  games
1 epoch training time: 1970.026197195053
Training models/lesson2c


Loading a model without an environment, this model cannot be trained until it has a valid environment.




---------------------------------------
| approxkl           | 1.28054635e-05 |
| clipfrac           | 0.0            |
| explained_variance | -4.21          |
| fps                | 351            |
| n_updates          | 1              |
| policy_entropy     | 1.7916825      |
| policy_loss        | -0.00027095425 |
| serial_timesteps   | 3000           |
| time_elapsed       | 2.38e-06       |
| total_timesteps    | 3000           |
| value_loss         | 0.23029037     |
---------------------------------------
---------------------------------------
| approxkl           | 3.717184e-05   |
| clipfrac           | 0.0            |
| explained_variance | -0.107         |
| fps                | 363            |
| n_updates          | 2              |
| policy_entropy     | 1.7914784      |
| policy_loss        | -0.00026938293 |
| serial_timesteps   | 6000           |
| time_elapsed       | 8.54           |
| total_timesteps    | 6000           |
| value_loss         | 0.037325494    |


--------------------------------------
| approxkl           | 0.00069759495 |
| clipfrac           | 0.0           |
| explained_variance | 0.288         |
| fps                | 368           |
| n_updates          | 18            |
| policy_entropy     | 1.5431947     |
| policy_loss        | -0.0008903565 |
| serial_timesteps   | 54000         |
| time_elapsed       | 139           |
| total_timesteps    | 54000         |
| value_loss         | 0.021130405   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0016514473 |
| clipfrac           | 0.013916666  |
| explained_variance | 0.356        |
| fps                | 375          |
| n_updates          | 19           |
| policy_entropy     | 1.5462519    |
| policy_loss        | -0.002411047 |
| serial_timesteps   | 57000        |
| time_elapsed       | 147          |
| total_timesteps    | 57000        |
| value_loss         | 0.018897442  |
-------------------------------------

--------------------------------------
| approxkl           | 0.002309564   |
| clipfrac           | 0.01175       |
| explained_variance | 0.0401        |
| fps                | 377           |
| n_updates          | 35            |
| policy_entropy     | 1.5231801     |
| policy_loss        | -0.0015032273 |
| serial_timesteps   | 105000        |
| time_elapsed       | 276           |
| total_timesteps    | 105000        |
| value_loss         | 0.016517138   |
--------------------------------------
--------------------------------------
| approxkl           | 0.005755662   |
| clipfrac           | 0.09833334    |
| explained_variance | -0.0463       |
| fps                | 370           |
| n_updates          | 36            |
| policy_entropy     | 1.4916167     |
| policy_loss        | -0.0023083475 |
| serial_timesteps   | 108000        |
| time_elapsed       | 284           |
| total_timesteps    | 108000        |
| value_loss         | 0.009736985   |
-------------------------

--------------------------------------
| approxkl           | 0.0037609865  |
| clipfrac           | 0.07783333    |
| explained_variance | -0.0468       |
| fps                | 374           |
| n_updates          | 52            |
| policy_entropy     | 1.4573255     |
| policy_loss        | -0.0022825697 |
| serial_timesteps   | 156000        |
| time_elapsed       | 413           |
| total_timesteps    | 156000        |
| value_loss         | 0.006345933   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0013970499  |
| clipfrac           | 0.0           |
| explained_variance | 0.0124        |
| fps                | 370           |
| n_updates          | 53            |
| policy_entropy     | 1.4631902     |
| policy_loss        | -0.0005288605 |
| serial_timesteps   | 159000        |
| time_elapsed       | 421           |
| total_timesteps    | 159000        |
| value_loss         | 0.012365295   |
-------------------------

--------------------------------------
| approxkl           | 0.00074807846 |
| clipfrac           | 8.333333e-05  |
| explained_variance | -0.0175       |
| fps                | 376           |
| n_updates          | 69            |
| policy_entropy     | 1.2698785     |
| policy_loss        | -0.0013534855 |
| serial_timesteps   | 207000        |
| time_elapsed       | 549           |
| total_timesteps    | 207000        |
| value_loss         | 0.007020119   |
--------------------------------------
---------------------------------------
| approxkl           | 0.0025846648   |
| clipfrac           | 0.017749999    |
| explained_variance | 0.0466         |
| fps                | 371            |
| n_updates          | 70             |
| policy_entropy     | 1.3192815      |
| policy_loss        | 0.000112852365 |
| serial_timesteps   | 210000         |
| time_elapsed       | 557            |
| total_timesteps    | 210000         |
| value_loss         | 0.009264847    |
-------------

--------------------------------------
| approxkl           | 0.0010620891  |
| clipfrac           | 0.0           |
| explained_variance | -0.023        |
| fps                | 379           |
| n_updates          | 86            |
| policy_entropy     | 1.0669943     |
| policy_loss        | -0.0004343564 |
| serial_timesteps   | 258000        |
| time_elapsed       | 686           |
| total_timesteps    | 258000        |
| value_loss         | 0.010992898   |
--------------------------------------
--------------------------------------
| approxkl           | 0.007440503   |
| clipfrac           | 0.09625       |
| explained_variance | 0.000136      |
| fps                | 371           |
| n_updates          | 87            |
| policy_entropy     | 0.9536732     |
| policy_loss        | -0.0028797258 |
| serial_timesteps   | 261000        |
| time_elapsed       | 694           |
| total_timesteps    | 261000        |
| value_loss         | 0.00879498    |
-------------------------

--------------------------------------
| approxkl           | 0.0033720138  |
| clipfrac           | 0.0037500001  |
| explained_variance | 0.0544        |
| fps                | 376           |
| n_updates          | 103           |
| policy_entropy     | 1.1110101     |
| policy_loss        | -0.0013529048 |
| serial_timesteps   | 309000        |
| time_elapsed       | 822           |
| total_timesteps    | 309000        |
| value_loss         | 0.008992717   |
--------------------------------------
---------------------------------------
| approxkl           | 0.0006502131   |
| clipfrac           | 0.004583333    |
| explained_variance | -0.119         |
| fps                | 372            |
| n_updates          | 104            |
| policy_entropy     | 1.0492358      |
| policy_loss        | -0.00056687545 |
| serial_timesteps   | 312000         |
| time_elapsed       | 830            |
| total_timesteps    | 312000         |
| value_loss         | 0.011716184    |
-------------

--------------------------------------
| approxkl           | 0.0021006376  |
| clipfrac           | 0.0           |
| explained_variance | 0.0856        |
| fps                | 377           |
| n_updates          | 120           |
| policy_entropy     | 1.3728914     |
| policy_loss        | -0.0011990621 |
| serial_timesteps   | 360000        |
| time_elapsed       | 959           |
| total_timesteps    | 360000        |
| value_loss         | 0.011546576   |
--------------------------------------
--------------------------------------
| approxkl           | 0.008315857   |
| clipfrac           | 0.19424999    |
| explained_variance | 0.00304       |
| fps                | 371           |
| n_updates          | 121           |
| policy_entropy     | 1.3239993     |
| policy_loss        | -0.0027564804 |
| serial_timesteps   | 363000        |
| time_elapsed       | 967           |
| total_timesteps    | 363000        |
| value_loss         | 0.01020496    |
-------------------------

-------------------------------------
| approxkl           | 0.0072017196 |
| clipfrac           | 0.087416664  |
| explained_variance | -0.00939     |
| fps                | 378          |
| n_updates          | 137          |
| policy_entropy     | 1.2350215    |
| policy_loss        | -0.003228331 |
| serial_timesteps   | 411000       |
| time_elapsed       | 1.09e+03     |
| total_timesteps    | 411000       |
| value_loss         | 0.009024601  |
-------------------------------------
--------------------------------------
| approxkl           | 0.0012032153  |
| clipfrac           | 0.001         |
| explained_variance | 0.0118        |
| fps                | 372           |
| n_updates          | 138           |
| policy_entropy     | 1.1996186     |
| policy_loss        | -0.0013400076 |
| serial_timesteps   | 414000        |
| time_elapsed       | 1.1e+03       |
| total_timesteps    | 414000        |
| value_loss         | 0.008711029   |
--------------------------------------

---------------------------------------
| approxkl           | 0.0007185945   |
| clipfrac           | 0.009416667    |
| explained_variance | -0.0312        |
| fps                | 379            |
| n_updates          | 154            |
| policy_entropy     | 0.9578277      |
| policy_loss        | -0.00045080174 |
| serial_timesteps   | 462000         |
| time_elapsed       | 1.23e+03       |
| total_timesteps    | 462000         |
| value_loss         | 0.009828377    |
---------------------------------------
---------------------------------------
| approxkl           | 0.000857114    |
| clipfrac           | 0.004416667    |
| explained_variance | 0.0211         |
| fps                | 373            |
| n_updates          | 155            |
| policy_entropy     | 1.0042768      |
| policy_loss        | -0.00078739715 |
| serial_timesteps   | 465000         |
| time_elapsed       | 1.24e+03       |
| total_timesteps    | 465000         |
| value_loss         | 0.007658659    |




Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished
Episode 7 finished
Episode 8 finished
Episode 9 finished
Episode 10 finished
Episode 11 finished
Episode 12 finished
Episode 13 finished
Episode 14 finished
Episode 15 finished
Episode 16 finished
Episode 17 finished
Episode 18 finished
Episode 19 finished
Win  0 / 20  games
Tie  20 / 20  games
Lose  0 / 20  games
1 epoch training time: 1972.2835915088654
Training models/lesson2c


Loading a model without an environment, this model cannot be trained until it has a valid environment.




---------------------------------------
| approxkl           | 5.226387e-06   |
| clipfrac           | 0.0            |
| explained_variance | -0.311         |
| fps                | 347            |
| n_updates          | 1              |
| policy_entropy     | 1.7917423      |
| policy_loss        | -0.00012697646 |
| serial_timesteps   | 3000           |
| time_elapsed       | 1.91e-06       |
| total_timesteps    | 3000           |
| value_loss         | 0.2236678      |
---------------------------------------
--------------------------------------
| approxkl           | 2.3101487e-05 |
| clipfrac           | 0.0           |
| explained_variance | -0.26         |
| fps                | 363           |
| n_updates          | 2             |
| policy_entropy     | 1.7916778     |
| policy_loss        | -0.0004017662 |
| serial_timesteps   | 6000          |
| time_elapsed       | 8.65          |
| total_timesteps    | 6000          |
| value_loss         | 0.072917074   |
------------

--------------------------------------
| approxkl           | 0.0057818945  |
| clipfrac           | 0.099249996   |
| explained_variance | 0.173         |
| fps                | 373           |
| n_updates          | 16            |
| policy_entropy     | 1.5315692     |
| policy_loss        | -0.0041216426 |
| serial_timesteps   | 48000         |
| time_elapsed       | 123           |
| total_timesteps    | 48000         |
| value_loss         | 0.010516164   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0047876337 |
| clipfrac           | 0.116500005  |
| explained_variance | 0.391        |
| fps                | 369          |
| n_updates          | 17           |
| policy_entropy     | 1.4934766    |
| policy_loss        | -0.004366417 |
| serial_timesteps   | 51000        |
| time_elapsed       | 131          |
| total_timesteps    | 51000        |
| value_loss         | 0.0055094236 |
-------------------------------------

-------------------------------------
| approxkl           | 0.0010149394 |
| clipfrac           | 0.0          |
| explained_variance | -0.045       |
| fps                | 376          |
| n_updates          | 33           |
| policy_entropy     | 1.1078568    |
| policy_loss        | -0.001100664 |
| serial_timesteps   | 99000        |
| time_elapsed       | 260          |
| total_timesteps    | 99000        |
| value_loss         | 0.014522618  |
-------------------------------------
--------------------------------------
| approxkl           | 0.0037362482  |
| clipfrac           | 0.04175       |
| explained_variance | 0.0456        |
| fps                | 371           |
| n_updates          | 34            |
| policy_entropy     | 1.0736881     |
| policy_loss        | -0.0015752108 |
| serial_timesteps   | 102000        |
| time_elapsed       | 268           |
| total_timesteps    | 102000        |
| value_loss         | 0.011787867   |
--------------------------------------

--------------------------------------
| approxkl           | 0.0021210671  |
| clipfrac           | 0.0355        |
| explained_variance | -0.0116       |
| fps                | 379           |
| n_updates          | 50            |
| policy_entropy     | 1.0978879     |
| policy_loss        | -0.0015220905 |
| serial_timesteps   | 150000        |
| time_elapsed       | 397           |
| total_timesteps    | 150000        |
| value_loss         | 0.0103897685  |
--------------------------------------
---------------------------------------
| approxkl           | 0.003481397    |
| clipfrac           | 0.007583333    |
| explained_variance | 0.00438        |
| fps                | 371            |
| n_updates          | 51             |
| policy_entropy     | 1.0283216      |
| policy_loss        | -0.00077620364 |
| serial_timesteps   | 153000         |
| time_elapsed       | 405            |
| total_timesteps    | 153000         |
| value_loss         | 0.008302519    |
-------------

---------------------------------------
| approxkl           | 0.000641299    |
| clipfrac           | 0.009          |
| explained_variance | -0.00746       |
| fps                | 378            |
| n_updates          | 67             |
| policy_entropy     | 0.8469434      |
| policy_loss        | -1.4478515e-05 |
| serial_timesteps   | 201000         |
| time_elapsed       | 533            |
| total_timesteps    | 201000         |
| value_loss         | 0.0094978465   |
---------------------------------------
--------------------------------------
| approxkl           | 0.000922576   |
| clipfrac           | 0.011083333   |
| explained_variance | 0.0259        |
| fps                | 372           |
| n_updates          | 68            |
| policy_entropy     | 0.9304913     |
| policy_loss        | -0.0007795885 |
| serial_timesteps   | 204000        |
| time_elapsed       | 541           |
| total_timesteps    | 204000        |
| value_loss         | 0.00979192    |
------------

--------------------------------------
| approxkl           | 0.0017865016  |
| clipfrac           | 0.045083333   |
| explained_variance | -0.0492       |
| fps                | 378           |
| n_updates          | 84            |
| policy_entropy     | 0.8457678     |
| policy_loss        | -0.0007336559 |
| serial_timesteps   | 252000        |
| time_elapsed       | 669           |
| total_timesteps    | 252000        |
| value_loss         | 0.008373798   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0015790678 |
| clipfrac           | 0.00075      |
| explained_variance | -0.0228      |
| fps                | 371          |
| n_updates          | 85           |
| policy_entropy     | 0.83434576   |
| policy_loss        | -0.000772778 |
| serial_timesteps   | 255000       |
| time_elapsed       | 677          |
| total_timesteps    | 255000       |
| value_loss         | 0.008204112  |
-------------------------------------

--------------------------------------
| approxkl           | 0.0018274611  |
| clipfrac           | 0.021166667   |
| explained_variance | 0.0566        |
| fps                | 379           |
| n_updates          | 101           |
| policy_entropy     | 0.9992426     |
| policy_loss        | -0.0018010612 |
| serial_timesteps   | 303000        |
| time_elapsed       | 805           |
| total_timesteps    | 303000        |
| value_loss         | 0.011221824   |
--------------------------------------
---------------------------------------
| approxkl           | 0.003066298    |
| clipfrac           | 0.00775        |
| explained_variance | -0.0536        |
| fps                | 371            |
| n_updates          | 102            |
| policy_entropy     | 1.0266031      |
| policy_loss        | -0.00044725346 |
| serial_timesteps   | 306000         |
| time_elapsed       | 813            |
| total_timesteps    | 306000         |
| value_loss         | 0.010266855    |
-------------

--------------------------------------
| approxkl           | 0.0036347234  |
| clipfrac           | 0.06566667    |
| explained_variance | 0.0129        |
| fps                | 379           |
| n_updates          | 118           |
| policy_entropy     | 0.8669843     |
| policy_loss        | -0.0020042688 |
| serial_timesteps   | 354000        |
| time_elapsed       | 941           |
| total_timesteps    | 354000        |
| value_loss         | 0.016192604   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0022788492  |
| clipfrac           | 0.019083332   |
| explained_variance | 0.0261        |
| fps                | 370           |
| n_updates          | 119           |
| policy_entropy     | 0.9221426     |
| policy_loss        | -0.0015772241 |
| serial_timesteps   | 357000        |
| time_elapsed       | 949           |
| total_timesteps    | 357000        |
| value_loss         | 0.008997273   |
-------------------------

---------------------------------------
| approxkl           | 0.0003270804   |
| clipfrac           | 0.0014166667   |
| explained_variance | 0.0879         |
| fps                | 356            |
| n_updates          | 135            |
| policy_entropy     | 0.57654214     |
| policy_loss        | -4.8874004e-05 |
| serial_timesteps   | 405000         |
| time_elapsed       | 1.08e+03       |
| total_timesteps    | 405000         |
| value_loss         | 0.010781012    |
---------------------------------------
--------------------------------------
| approxkl           | 0.001750407   |
| clipfrac           | 0.022416666   |
| explained_variance | 0.122         |
| fps                | 366           |
| n_updates          | 136           |
| policy_entropy     | 0.58734936    |
| policy_loss        | -0.0011920347 |
| serial_timesteps   | 408000        |
| time_elapsed       | 1.09e+03      |
| total_timesteps    | 408000        |
| value_loss         | 0.009424265   |
------------

---------------------------------------
| approxkl           | 0.0011927041   |
| clipfrac           | 0.01275        |
| explained_variance | -0.0498        |
| fps                | 378            |
| n_updates          | 152            |
| policy_entropy     | 0.77117175     |
| policy_loss        | -0.00027060823 |
| serial_timesteps   | 456000         |
| time_elapsed       | 1.22e+03       |
| total_timesteps    | 456000         |
| value_loss         | 0.005949801    |
---------------------------------------
--------------------------------------
| approxkl           | 0.00047827617 |
| clipfrac           | 0.00225       |
| explained_variance | -0.0391       |
| fps                | 371           |
| n_updates          | 153           |
| policy_entropy     | 0.8199035     |
| policy_loss        | -0.000701079  |
| serial_timesteps   | 459000        |
| time_elapsed       | 1.22e+03      |
| total_timesteps    | 459000        |
| value_loss         | 0.0068079075  |
------------



Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished
Episode 7 finished
Episode 8 finished
Episode 9 finished
Episode 10 finished
Episode 11 finished
Episode 12 finished
Episode 13 finished
Episode 14 finished
Episode 15 finished
Episode 16 finished
Episode 17 finished
Episode 18 finished
Episode 19 finished
Win  0 / 20  games
Tie  20 / 20  games
Lose  0 / 20  games
1 epoch training time: 1971.786649465561
Training models/lesson2c


Loading a model without an environment, this model cannot be trained until it has a valid environment.




---------------------------------------
| approxkl           | 1.6053446e-05  |
| clipfrac           | 0.0            |
| explained_variance | -0.201         |
| fps                | 340            |
| n_updates          | 1              |
| policy_entropy     | 1.7917224      |
| policy_loss        | -0.00036958372 |
| serial_timesteps   | 3000           |
| time_elapsed       | 1.91e-06       |
| total_timesteps    | 3000           |
| value_loss         | 0.44524089     |
---------------------------------------
---------------------------------------
| approxkl           | 2.010206e-05   |
| clipfrac           | 0.0            |
| explained_variance | -0.0954        |
| fps                | 356            |
| n_updates          | 2              |
| policy_entropy     | 1.7916052      |
| policy_loss        | -0.00033946143 |
| serial_timesteps   | 6000           |
| time_elapsed       | 8.81           |
| total_timesteps    | 6000           |
| value_loss         | 0.13190186     |


--------------------------------------
| approxkl           | 0.00069481524 |
| clipfrac           | 0.00175       |
| explained_variance | 0.328         |
| fps                | 372           |
| n_updates          | 14            |
| policy_entropy     | 1.5224675     |
| policy_loss        | -0.0012201478 |
| serial_timesteps   | 42000         |
| time_elapsed       | 107           |
| total_timesteps    | 42000         |
| value_loss         | 0.027075492   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0015123282  |
| clipfrac           | 0.0055        |
| explained_variance | 0.206         |
| fps                | 367           |
| n_updates          | 15            |
| policy_entropy     | 1.5013282     |
| policy_loss        | -0.0019227748 |
| serial_timesteps   | 45000         |
| time_elapsed       | 116           |
| total_timesteps    | 45000         |
| value_loss         | 0.02434558    |
-------------------------

--------------------------------------
| approxkl           | 0.003994067   |
| clipfrac           | 0.029083334   |
| explained_variance | 0.13          |
| fps                | 369           |
| n_updates          | 31            |
| policy_entropy     | 1.1426635     |
| policy_loss        | -0.0015738432 |
| serial_timesteps   | 93000         |
| time_elapsed       | 245           |
| total_timesteps    | 93000         |
| value_loss         | 0.0138604855  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0016144405  |
| clipfrac           | 0.012         |
| explained_variance | 0.0748        |
| fps                | 375           |
| n_updates          | 32            |
| policy_entropy     | 1.0727178     |
| policy_loss        | -0.0017119228 |
| serial_timesteps   | 96000         |
| time_elapsed       | 253           |
| total_timesteps    | 96000         |
| value_loss         | 0.009967843   |
-------------------------

--------------------------------------
| approxkl           | 0.0009208644  |
| clipfrac           | 0.0           |
| explained_variance | -0.0336       |
| fps                | 368           |
| n_updates          | 48            |
| policy_entropy     | 1.0413258     |
| policy_loss        | 0.00055861374 |
| serial_timesteps   | 144000        |
| time_elapsed       | 383           |
| total_timesteps    | 144000        |
| value_loss         | 0.009240031   |
--------------------------------------
----------------------------------------
| approxkl           | 0.0008236023    |
| clipfrac           | 0.0011666666    |
| explained_variance | -0.0012         |
| fps                | 373             |
| n_updates          | 49              |
| policy_entropy     | 1.0258832       |
| policy_loss        | -0.000103574595 |
| serial_timesteps   | 147000          |
| time_elapsed       | 391             |
| total_timesteps    | 147000          |
| value_loss         | 0.013509025     |
-

--------------------------------------
| approxkl           | 0.004519261   |
| clipfrac           | 0.066083334   |
| explained_variance | 0.055         |
| fps                | 376           |
| n_updates          | 65            |
| policy_entropy     | 0.927675      |
| policy_loss        | -0.0029533887 |
| serial_timesteps   | 195000        |
| time_elapsed       | 520           |
| total_timesteps    | 195000        |
| value_loss         | 0.016434506   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0006287337  |
| clipfrac           | 0.0030833336  |
| explained_variance | 0.00722       |
| fps                | 370           |
| n_updates          | 66            |
| policy_entropy     | 0.9330617     |
| policy_loss        | -0.0001679147 |
| serial_timesteps   | 198000        |
| time_elapsed       | 528           |
| total_timesteps    | 198000        |
| value_loss         | 0.006775612   |
-------------------------

--------------------------------------
| approxkl           | 0.0007452282  |
| clipfrac           | 0.008083333   |
| explained_variance | 0.182         |
| fps                | 376           |
| n_updates          | 82            |
| policy_entropy     | 1.1608285     |
| policy_loss        | -0.0009987715 |
| serial_timesteps   | 246000        |
| time_elapsed       | 657           |
| total_timesteps    | 246000        |
| value_loss         | 0.01167483    |
--------------------------------------
--------------------------------------
| approxkl           | 0.0037880947  |
| clipfrac           | 0.024166666   |
| explained_variance | 0.0146        |
| fps                | 369           |
| n_updates          | 83            |
| policy_entropy     | 1.0976048     |
| policy_loss        | -0.0019924624 |
| serial_timesteps   | 249000        |
| time_elapsed       | 665           |
| total_timesteps    | 249000        |
| value_loss         | 0.012427099   |
-------------------------

--------------------------------------
| approxkl           | 0.0048525105  |
| clipfrac           | 0.0725        |
| explained_variance | 0.0253        |
| fps                | 371           |
| n_updates          | 99            |
| policy_entropy     | 1.2394353     |
| policy_loss        | -0.0015867914 |
| serial_timesteps   | 297000        |
| time_elapsed       | 794           |
| total_timesteps    | 297000        |
| value_loss         | 0.0065732584  |
--------------------------------------
---------------------------------------
| approxkl           | 0.00060943526  |
| clipfrac           | 0.0            |
| explained_variance | -0.0543        |
| fps                | 368            |
| n_updates          | 100            |
| policy_entropy     | 1.2303247      |
| policy_loss        | -0.00042459407 |
| serial_timesteps   | 300000         |
| time_elapsed       | 803            |
| total_timesteps    | 300000         |
| value_loss         | 0.008126933    |
-------------

---------------------------------------
| approxkl           | 0.00032150664  |
| clipfrac           | 0.0            |
| explained_variance | 0.0958         |
| fps                | 371            |
| n_updates          | 116            |
| policy_entropy     | 1.1600811      |
| policy_loss        | -0.00038910282 |
| serial_timesteps   | 348000         |
| time_elapsed       | 932            |
| total_timesteps    | 348000         |
| value_loss         | 0.009155788    |
---------------------------------------
--------------------------------------
| approxkl           | 0.005729368   |
| clipfrac           | 0.08025       |
| explained_variance | 0.051         |
| fps                | 375           |
| n_updates          | 117           |
| policy_entropy     | 1.2356626     |
| policy_loss        | -0.0038237984 |
| serial_timesteps   | 351000        |
| time_elapsed       | 940           |
| total_timesteps    | 351000        |
| value_loss         | 0.008873628   |
------------

--------------------------------------
| approxkl           | 0.0017538231  |
| clipfrac           | 8.333333e-05  |
| explained_variance | 0.0105        |
| fps                | 368           |
| n_updates          | 133           |
| policy_entropy     | 1.2011237     |
| policy_loss        | -7.874827e-05 |
| serial_timesteps   | 399000        |
| time_elapsed       | 1.07e+03      |
| total_timesteps    | 399000        |
| value_loss         | 0.018907037   |
--------------------------------------
-------------------------------------
| approxkl           | 0.001733179  |
| clipfrac           | 0.0          |
| explained_variance | -0.066       |
| fps                | 375          |
| n_updates          | 134          |
| policy_entropy     | 1.1916069    |
| policy_loss        | -0.001396151 |
| serial_timesteps   | 402000       |
| time_elapsed       | 1.08e+03     |
| total_timesteps    | 402000       |
| value_loss         | 0.0063260924 |
-------------------------------------

---------------------------------------
| approxkl           | 0.00030054082  |
| clipfrac           | 0.0            |
| explained_variance | -0.0233        |
| fps                | 368            |
| n_updates          | 150            |
| policy_entropy     | 1.1046215      |
| policy_loss        | -0.00015399075 |
| serial_timesteps   | 450000         |
| time_elapsed       | 1.21e+03       |
| total_timesteps    | 450000         |
| value_loss         | 0.013000082    |
---------------------------------------
--------------------------------------
| approxkl           | 0.0012402174  |
| clipfrac           | 0.0           |
| explained_variance | -0.00118      |
| fps                | 375           |
| n_updates          | 151           |
| policy_entropy     | 1.0697044     |
| policy_loss        | -0.0009258071 |
| serial_timesteps   | 453000        |
| time_elapsed       | 1.21e+03      |
| total_timesteps    | 453000        |
| value_loss         | 0.01203702    |
------------

Loading a model without an environment, this model cannot be trained until it has a valid environment.


Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished
Episode 7 finished
Episode 8 finished
Episode 9 finished
Episode 10 finished
Episode 11 finished
Episode 12 finished
Episode 13 finished
Episode 14 finished
Episode 15 finished
Episode 16 finished
Episode 17 finished
Episode 18 finished
Episode 19 finished
Win  0 / 20  games
Tie  20 / 20  games
Lose  0 / 20  games
1 epoch training time: 1980.0426187515259
Training models/lesson2c


Loading a model without an environment, this model cannot be trained until it has a valid environment.




---------------------------------------
| approxkl           | 7.74187e-06    |
| clipfrac           | 0.0            |
| explained_variance | -0.361         |
| fps                | 347            |
| n_updates          | 1              |
| policy_entropy     | 1.7917444      |
| policy_loss        | -0.00018095868 |
| serial_timesteps   | 3000           |
| time_elapsed       | 1.91e-06       |
| total_timesteps    | 3000           |
| value_loss         | 0.196223       |
---------------------------------------
--------------------------------------
| approxkl           | 3.9557777e-05 |
| clipfrac           | 0.0           |
| explained_variance | -0.659        |
| fps                | 359           |
| n_updates          | 2             |
| policy_entropy     | 1.7916533     |
| policy_loss        | -0.0008095934 |
| serial_timesteps   | 6000          |
| time_elapsed       | 8.63          |
| total_timesteps    | 6000          |
| value_loss         | 0.0550979     |
------------

--------------------------------------
| approxkl           | 0.001642114   |
| clipfrac           | 0.008083333   |
| explained_variance | 0.297         |
| fps                | 369           |
| n_updates          | 14            |
| policy_entropy     | 1.3362669     |
| policy_loss        | -0.0022925462 |
| serial_timesteps   | 42000         |
| time_elapsed       | 108           |
| total_timesteps    | 42000         |
| value_loss         | 0.02085874    |
--------------------------------------
--------------------------------------
| approxkl           | 0.00211142    |
| clipfrac           | 0.014499999   |
| explained_variance | 0.11          |
| fps                | 359           |
| n_updates          | 15            |
| policy_entropy     | 1.2998667     |
| policy_loss        | -0.0031063803 |
| serial_timesteps   | 45000         |
| time_elapsed       | 116           |
| total_timesteps    | 45000         |
| value_loss         | 0.016464032   |
-------------------------

--------------------------------------
| approxkl           | 0.0005689555  |
| clipfrac           | 0.0           |
| explained_variance | 0.129         |
| fps                | 367           |
| n_updates          | 31            |
| policy_entropy     | 1.2519917     |
| policy_loss        | -0.0005148976 |
| serial_timesteps   | 93000         |
| time_elapsed       | 247           |
| total_timesteps    | 93000         |
| value_loss         | 0.012903245   |
--------------------------------------
--------------------------------------
| approxkl           | 0.001691814   |
| clipfrac           | 0.0035833332  |
| explained_variance | 0.213         |
| fps                | 369           |
| n_updates          | 32            |
| policy_entropy     | 1.3044556     |
| policy_loss        | -0.0008512401 |
| serial_timesteps   | 96000         |
| time_elapsed       | 255           |
| total_timesteps    | 96000         |
| value_loss         | 0.02184519    |
-------------------------

--------------------------------------
| approxkl           | 0.00068073836 |
| clipfrac           | 0.0           |
| explained_variance | 0.0326        |
| fps                | 366           |
| n_updates          | 48            |
| policy_entropy     | 1.400578      |
| policy_loss        | -0.0004711039 |
| serial_timesteps   | 144000        |
| time_elapsed       | 385           |
| total_timesteps    | 144000        |
| value_loss         | 0.009879407   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0017093106  |
| clipfrac           | 0.00024999998 |
| explained_variance | 0.022         |
| fps                | 372           |
| n_updates          | 49            |
| policy_entropy     | 1.4155757     |
| policy_loss        | -0.0016382941 |
| serial_timesteps   | 147000        |
| time_elapsed       | 393           |
| total_timesteps    | 147000        |
| value_loss         | 0.009737786   |
-------------------------

--------------------------------------
| approxkl           | 0.0037163396  |
| clipfrac           | 0.0225        |
| explained_variance | -0.0617       |
| fps                | 371           |
| n_updates          | 65            |
| policy_entropy     | 1.5183117     |
| policy_loss        | -0.0032631245 |
| serial_timesteps   | 195000        |
| time_elapsed       | 523           |
| total_timesteps    | 195000        |
| value_loss         | 0.009816384   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0026871103  |
| clipfrac           | 0.006         |
| explained_variance | -0.0281       |
| fps                | 367           |
| n_updates          | 66            |
| policy_entropy     | 1.5184181     |
| policy_loss        | -0.0014788192 |
| serial_timesteps   | 198000        |
| time_elapsed       | 531           |
| total_timesteps    | 198000        |
| value_loss         | 0.009653511   |
-------------------------

--------------------------------------
| approxkl           | 0.004738771   |
| clipfrac           | 0.06791667    |
| explained_variance | 0.052         |
| fps                | 368           |
| n_updates          | 82            |
| policy_entropy     | 1.183544      |
| policy_loss        | -0.0020088616 |
| serial_timesteps   | 246000        |
| time_elapsed       | 661           |
| total_timesteps    | 246000        |
| value_loss         | 0.004759555   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00038376526 |
| clipfrac           | 0.0           |
| explained_variance | 0.0258        |
| fps                | 374           |
| n_updates          | 83            |
| policy_entropy     | 1.1751372     |
| policy_loss        | 8.065501e-06  |
| serial_timesteps   | 249000        |
| time_elapsed       | 670           |
| total_timesteps    | 249000        |
| value_loss         | 0.007624096   |
-------------------------

---------------------------------------
| approxkl           | 0.00011206369  |
| clipfrac           | 0.0            |
| explained_variance | 0.00541        |
| fps                | 367            |
| n_updates          | 99             |
| policy_entropy     | 0.668849       |
| policy_loss        | -5.1321484e-05 |
| serial_timesteps   | 297000         |
| time_elapsed       | 799            |
| total_timesteps    | 297000         |
| value_loss         | 0.017534828    |
---------------------------------------
--------------------------------------
| approxkl           | 0.00043140157 |
| clipfrac           | 0.0029166667  |
| explained_variance | 0.00231       |
| fps                | 373           |
| n_updates          | 100           |
| policy_entropy     | 0.6234663     |
| policy_loss        | -0.000566798  |
| serial_timesteps   | 300000        |
| time_elapsed       | 807           |
| total_timesteps    | 300000        |
| value_loss         | 0.021150382   |
------------

-------------------------------------
| approxkl           | 0.0010397541 |
| clipfrac           | 0.017083334  |
| explained_variance | 0.00303      |
| fps                | 373          |
| n_updates          | 116          |
| policy_entropy     | 0.6609538    |
| policy_loss        | -0.001048404 |
| serial_timesteps   | 348000       |
| time_elapsed       | 937          |
| total_timesteps    | 348000       |
| value_loss         | 0.009379115  |
-------------------------------------
--------------------------------------
| approxkl           | 0.001004756   |
| clipfrac           | 0.01625       |
| explained_variance | 0.00783       |
| fps                | 368           |
| n_updates          | 117           |
| policy_entropy     | 0.714692      |
| policy_loss        | -0.0005850697 |
| serial_timesteps   | 351000        |
| time_elapsed       | 945           |
| total_timesteps    | 351000        |
| value_loss         | 0.008423712   |
--------------------------------------

---------------------------------------
| approxkl           | 0.0004677458   |
| clipfrac           | 0.0010833334   |
| explained_variance | 0.0518         |
| fps                | 373            |
| n_updates          | 133            |
| policy_entropy     | 0.99292        |
| policy_loss        | -0.00029951037 |
| serial_timesteps   | 399000         |
| time_elapsed       | 1.07e+03       |
| total_timesteps    | 399000         |
| value_loss         | 0.012973616    |
---------------------------------------
--------------------------------------
| approxkl           | 0.0028776377  |
| clipfrac           | 0.016916666   |
| explained_variance | 0.279         |
| fps                | 366           |
| n_updates          | 134           |
| policy_entropy     | 0.95317996    |
| policy_loss        | -0.0020231484 |
| serial_timesteps   | 402000        |
| time_elapsed       | 1.08e+03      |
| total_timesteps    | 402000        |
| value_loss         | 0.0098925475  |
------------

---------------------------------------
| approxkl           | 0.0004481372   |
| clipfrac           | 0.0011666666   |
| explained_variance | 0.0294         |
| fps                | 366            |
| n_updates          | 150            |
| policy_entropy     | 0.6032699      |
| policy_loss        | -0.00042412156 |
| serial_timesteps   | 450000         |
| time_elapsed       | 1.21e+03       |
| total_timesteps    | 450000         |
| value_loss         | 0.009624705    |
---------------------------------------
-------------------------------------
| approxkl           | 0.0023301868 |
| clipfrac           | 0.037916664  |
| explained_variance | 0.0667       |
| fps                | 375          |
| n_updates          | 151          |
| policy_entropy     | 0.70428544   |
| policy_loss        | -0.000802686 |
| serial_timesteps   | 453000       |
| time_elapsed       | 1.22e+03     |
| total_timesteps    | 453000       |
| value_loss         | 0.010017035  |
------------------------

--------------------------------------
| approxkl           | 0.0016260478  |
| clipfrac           | 0.011         |
| explained_variance | 0.0396        |
| fps                | 374           |
| n_updates          | 166           |
| policy_entropy     | 0.5546055     |
| policy_loss        | -0.0009676693 |
| serial_timesteps   | 498000        |
| time_elapsed       | 1.34e+03      |
| total_timesteps    | 498000        |
| value_loss         | 0.008763389   |
--------------------------------------
Loading a model without an environment, this model cannot be trained until it has a valid environment.


Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished
Episode 7 finished
Episode 8 finished
Episode 9 finished
Episode 10 finished
Episode 11 finished
Episode 12 finished
Episode 13 finished
Episode 14 finished
Episode 15 finished
Episode 16 finished
Episode 17 finished
Episode 18 finished
Episode 19 finished
Win  0 / 20  games
Tie  20 / 20  games
Lose  0 / 20  games
1 epoch training time: 1987.549852848053
Training models/lesson2c




Loading a model without an environment, this model cannot be trained until it has a valid environment.


--------------------------------------
| approxkl           | 1.610234e-05  |
| clipfrac           | 0.0           |
| explained_variance | -1.36         |
| fps                | 353           |
| n_updates          | 1             |
| policy_entropy     | 1.7917289     |
| policy_loss        | -0.0003755391 |
| serial_timesteps   | 3000          |
| time_elapsed       | 2.15e-06      |
| total_timesteps    | 3000          |
| value_loss         | 0.41011703    |
--------------------------------------
---------------------------------------
| approxkl           | 1.1742088e-05  |
| clipfrac           | 0.0            |
| explained_variance | -0.126         |
| fps                | 371            |
| n_updates          | 2              |
| policy_entropy     | 1.7917004      |
| policy_loss        | -0.00021699152 |
| serial_timesteps   | 6000           |
| time_elapsed       | 8.5            |
| total_timesteps    | 6000           |
| value_loss         | 0.12803389     |
-------------

--------------------------------------
| approxkl           | 0.010030793   |
| clipfrac           | 0.14633334    |
| explained_variance | 0.309         |
| fps                | 372           |
| n_updates          | 11            |
| policy_entropy     | 1.5253892     |
| policy_loss        | -0.0039957566 |
| serial_timesteps   | 33000         |
| time_elapsed       | 82.1          |
| total_timesteps    | 33000         |
| value_loss         | 0.024109356   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0003816152  |
| clipfrac           | 0.0           |
| explained_variance | 0.329         |
| fps                | 366           |
| n_updates          | 12            |
| policy_entropy     | 1.519112      |
| policy_loss        | -0.0006795906 |
| serial_timesteps   | 36000         |
| time_elapsed       | 90.2          |
| total_timesteps    | 36000         |
| value_loss         | 0.023261458   |
-------------------------

--------------------------------------
| approxkl           | 0.0004053474  |
| clipfrac           | 0.0015833334  |
| explained_variance | 0.323         |
| fps                | 377           |
| n_updates          | 28            |
| policy_entropy     | 1.521735      |
| policy_loss        | -0.0011064657 |
| serial_timesteps   | 84000         |
| time_elapsed       | 219           |
| total_timesteps    | 84000         |
| value_loss         | 0.009235341   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0028949534 |
| clipfrac           | 0.040666666  |
| explained_variance | 0.35         |
| fps                | 369          |
| n_updates          | 29           |
| policy_entropy     | 1.5464658    |
| policy_loss        | -0.002377356 |
| serial_timesteps   | 87000        |
| time_elapsed       | 227          |
| total_timesteps    | 87000        |
| value_loss         | 0.01500466   |
-------------------------------------

--------------------------------------
| approxkl           | 0.0036450457  |
| clipfrac           | 0.013333334   |
| explained_variance | -0.0033       |
| fps                | 378           |
| n_updates          | 45            |
| policy_entropy     | 1.2458738     |
| policy_loss        | -0.0015922582 |
| serial_timesteps   | 135000        |
| time_elapsed       | 355           |
| total_timesteps    | 135000        |
| value_loss         | 0.0068804612  |
--------------------------------------
---------------------------------------
| approxkl           | 0.0030414392   |
| clipfrac           | 0.003          |
| explained_variance | 0.0527         |
| fps                | 372            |
| n_updates          | 46             |
| policy_entropy     | 1.1121726      |
| policy_loss        | -0.00086532324 |
| serial_timesteps   | 138000         |
| time_elapsed       | 363            |
| total_timesteps    | 138000         |
| value_loss         | 0.010888878    |
-------------

-------------------------------------
| approxkl           | 0.0023472211 |
| clipfrac           | 0.04975      |
| explained_variance | 0.0528       |
| fps                | 378          |
| n_updates          | 62           |
| policy_entropy     | 1.1244104    |
| policy_loss        | -0.001800049 |
| serial_timesteps   | 186000       |
| time_elapsed       | 491          |
| total_timesteps    | 186000       |
| value_loss         | 0.008954672  |
-------------------------------------
-------------------------------------
| approxkl           | 0.005736028  |
| clipfrac           | 0.038083334  |
| explained_variance | 0.124        |
| fps                | 384          |
| n_updates          | 63           |
| policy_entropy     | 1.0848485    |
| policy_loss        | -0.001696678 |
| serial_timesteps   | 189000       |
| time_elapsed       | 499          |
| total_timesteps    | 189000       |
| value_loss         | 0.006801925  |
-------------------------------------
------------

--------------------------------------
| approxkl           | 0.0021673776  |
| clipfrac           | 0.028583333   |
| explained_variance | -0.0189       |
| fps                | 384           |
| n_updates          | 79            |
| policy_entropy     | 0.9946121     |
| policy_loss        | -0.0011510708 |
| serial_timesteps   | 237000        |
| time_elapsed       | 625           |
| total_timesteps    | 237000        |
| value_loss         | 0.009972574   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0025099185  |
| clipfrac           | 0.017666668   |
| explained_variance | 0.00935       |
| fps                | 378           |
| n_updates          | 80            |
| policy_entropy     | 1.0425627     |
| policy_loss        | -0.0005002644 |
| serial_timesteps   | 240000        |
| time_elapsed       | 633           |
| total_timesteps    | 240000        |
| value_loss         | 0.0071972003  |
-------------------------

--------------------------------------
| approxkl           | 0.0006426463  |
| clipfrac           | 0.01375       |
| explained_variance | 0.0275        |
| fps                | 379           |
| n_updates          | 96            |
| policy_entropy     | 0.67075896    |
| policy_loss        | -0.0006722514 |
| serial_timesteps   | 288000        |
| time_elapsed       | 758           |
| total_timesteps    | 288000        |
| value_loss         | 0.0054226844  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0039251964  |
| clipfrac           | 0.06541667    |
| explained_variance | -0.0119       |
| fps                | 386           |
| n_updates          | 97            |
| policy_entropy     | 0.60822475    |
| policy_loss        | -0.0022340142 |
| serial_timesteps   | 291000        |
| time_elapsed       | 766           |
| total_timesteps    | 291000        |
| value_loss         | 0.0070922766  |
-------------------------

--------------------------------------
| approxkl           | 0.0007879578  |
| clipfrac           | 0.0073333336  |
| explained_variance | -0.0155       |
| fps                | 387           |
| n_updates          | 113           |
| policy_entropy     | 0.64058936    |
| policy_loss        | -0.0006744877 |
| serial_timesteps   | 339000        |
| time_elapsed       | 892           |
| total_timesteps    | 339000        |
| value_loss         | 0.012507247   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0023640268 |
| clipfrac           | 0.020000001  |
| explained_variance | 0.0165       |
| fps                | 379          |
| n_updates          | 114          |
| policy_entropy     | 0.5832106    |
| policy_loss        | -0.001260861 |
| serial_timesteps   | 342000       |
| time_elapsed       | 900          |
| total_timesteps    | 342000       |
| value_loss         | 0.010219941  |
-------------------------------------

--------------------------------------
| approxkl           | 0.0010495206  |
| clipfrac           | 0.0062500006  |
| explained_variance | 0.0406        |
| fps                | 376           |
| n_updates          | 130           |
| policy_entropy     | 0.6051827     |
| policy_loss        | -0.0008279291 |
| serial_timesteps   | 390000        |
| time_elapsed       | 1.03e+03      |
| total_timesteps    | 390000        |
| value_loss         | 0.006445604   |
--------------------------------------
---------------------------------------
| approxkl           | 0.001101424    |
| clipfrac           | 0.014833333    |
| explained_variance | 0.00884        |
| fps                | 387            |
| n_updates          | 131            |
| policy_entropy     | 0.504138       |
| policy_loss        | -0.00094237365 |
| serial_timesteps   | 393000         |
| time_elapsed       | 1.03e+03       |
| total_timesteps    | 393000         |
| value_loss         | 0.008856987    |
-------------

--------------------------------------
| approxkl           | 0.0018216778  |
| clipfrac           | 0.021499999   |
| explained_variance | 0.0684        |
| fps                | 350           |
| n_updates          | 147           |
| policy_entropy     | 0.4394646     |
| policy_loss        | -0.0005757624 |
| serial_timesteps   | 441000        |
| time_elapsed       | 1.18e+03      |
| total_timesteps    | 441000        |
| value_loss         | 0.009758121   |
--------------------------------------
---------------------------------------
| approxkl           | 0.0011957068   |
| clipfrac           | 0.023500001    |
| explained_variance | 0.0622         |
| fps                | 343            |
| n_updates          | 148            |
| policy_entropy     | 0.50427705     |
| policy_loss        | -0.00077099656 |
| serial_timesteps   | 444000         |
| time_elapsed       | 1.18e+03       |
| total_timesteps    | 444000         |
| value_loss         | 0.010868093    |
-------------

--------------------------------------
| approxkl           | 0.0013907109  |
| clipfrac           | 0.010583334   |
| explained_variance | -0.0199       |
| fps                | 382           |
| n_updates          | 163           |
| policy_entropy     | 0.55612314    |
| policy_loss        | -0.0016753179 |
| serial_timesteps   | 489000        |
| time_elapsed       | 1.31e+03      |
| total_timesteps    | 489000        |
| value_loss         | 0.0108728735  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0016588814  |
| clipfrac           | 0.031833332   |
| explained_variance | -0.0444       |
| fps                | 375           |
| n_updates          | 164           |
| policy_entropy     | 0.58642983    |
| policy_loss        | -0.0015476604 |
| serial_timesteps   | 492000        |
| time_elapsed       | 1.31e+03      |
| total_timesteps    | 492000        |
| value_loss         | 0.009494085   |
-------------------------

Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished
Episode 7 finished
Episode 8 finished
Episode 9 finished
Episode 10 finished
Episode 11 finished
Episode 12 finished
Episode 13 finished
Episode 14 finished
Episode 15 finished
Episode 16 finished
Episode 17 finished
Episode 18 finished
Episode 19 finished
Win  0 / 20  games
Tie  20 / 20  games
Lose  0 / 20  games
1 epoch training time: 1976.5861721038818
Training models/lesson2c




Loading a model without an environment, this model cannot be trained until it has a valid environment.


--------------------------------------
| approxkl           | 1.7227649e-05 |
| clipfrac           | 0.0           |
| explained_variance | -0.908        |
| fps                | 296           |
| n_updates          | 1             |
| policy_entropy     | 1.7917235     |
| policy_loss        | -0.0003747453 |
| serial_timesteps   | 3000          |
| time_elapsed       | 1.91e-06      |
| total_timesteps    | 3000          |
| value_loss         | 0.21672224    |
--------------------------------------
--------------------------------------
| approxkl           | 0.0003309871  |
| clipfrac           | 0.0           |
| explained_variance | -0.632        |
| fps                | 346           |
| n_updates          | 2             |
| policy_entropy     | 1.7911837     |
| policy_loss        | -0.0017966597 |
| serial_timesteps   | 6000          |
| time_elapsed       | 10.1          |
| total_timesteps    | 6000          |
| value_loss         | 0.0075366073  |
-------------------------

--------------------------------------
| approxkl           | 0.0012003004  |
| clipfrac           | 0.0           |
| explained_variance | 0.325         |
| fps                | 354           |
| n_updates          | 9             |
| policy_entropy     | 1.5637541     |
| policy_loss        | -0.0014664484 |
| serial_timesteps   | 27000         |
| time_elapsed       | 69.6          |
| total_timesteps    | 27000         |
| value_loss         | 0.04168957    |
--------------------------------------
---------------------------------------
| approxkl           | 0.0002653964   |
| clipfrac           | 0.0            |
| explained_variance | 0.323          |
| fps                | 351            |
| n_updates          | 10             |
| policy_entropy     | 1.5975868      |
| policy_loss        | -1.8033143e-05 |
| serial_timesteps   | 30000          |
| time_elapsed       | 78.1           |
| total_timesteps    | 30000          |
| value_loss         | 0.023308355    |
-------------

--------------------------------------
| approxkl           | 0.00037069098 |
| clipfrac           | 0.0013333333  |
| explained_variance | 0.376         |
| fps                | 317           |
| n_updates          | 26            |
| policy_entropy     | 1.4459462     |
| policy_loss        | -0.000834145  |
| serial_timesteps   | 78000         |
| time_elapsed       | 216           |
| total_timesteps    | 78000         |
| value_loss         | 0.02388041    |
--------------------------------------
--------------------------------------
| approxkl           | 0.00092251034 |
| clipfrac           | 0.007583334   |
| explained_variance | 0.338         |
| fps                | 326           |
| n_updates          | 27            |
| policy_entropy     | 1.4920014     |
| policy_loss        | -0.003205016  |
| serial_timesteps   | 81000         |
| time_elapsed       | 225           |
| total_timesteps    | 81000         |
| value_loss         | 0.043532837   |
-------------------------

--------------------------------------
| approxkl           | 0.0015099562  |
| clipfrac           | 0.009333333   |
| explained_variance | 0.342         |
| fps                | 331           |
| n_updates          | 43            |
| policy_entropy     | 1.3160613     |
| policy_loss        | -0.0011247273 |
| serial_timesteps   | 129000        |
| time_elapsed       | 370           |
| total_timesteps    | 129000        |
| value_loss         | 0.029740347   |
--------------------------------------
--------------------------------------
| approxkl           | 0.002228839   |
| clipfrac           | 0.011166665   |
| explained_variance | 0.0134        |
| fps                | 336           |
| n_updates          | 44            |
| policy_entropy     | 1.330469      |
| policy_loss        | -0.0031646593 |
| serial_timesteps   | 132000        |
| time_elapsed       | 379           |
| total_timesteps    | 132000        |
| value_loss         | 0.02634051    |
-------------------------

--------------------------------------
| approxkl           | 0.0032444776  |
| clipfrac           | 0.018833334   |
| explained_variance | 0.313         |
| fps                | 365           |
| n_updates          | 60            |
| policy_entropy     | 1.1923486     |
| policy_loss        | -0.0015675842 |
| serial_timesteps   | 180000        |
| time_elapsed       | 515           |
| total_timesteps    | 180000        |
| value_loss         | 0.034706715   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0032432505  |
| clipfrac           | 0.035666667   |
| explained_variance | 0.28          |
| fps                | 373           |
| n_updates          | 61            |
| policy_entropy     | 1.202022      |
| policy_loss        | -0.0030855304 |
| serial_timesteps   | 183000        |
| time_elapsed       | 523           |
| total_timesteps    | 183000        |
| value_loss         | 0.02167025    |
-------------------------

--------------------------------------
| approxkl           | 0.004251699   |
| clipfrac           | 0.049333338   |
| explained_variance | 0.366         |
| fps                | 354           |
| n_updates          | 77            |
| policy_entropy     | 1.221332      |
| policy_loss        | -0.0030113726 |
| serial_timesteps   | 231000        |
| time_elapsed       | 654           |
| total_timesteps    | 231000        |
| value_loss         | 0.017053075   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0040914137  |
| clipfrac           | 0.041500002   |
| explained_variance | 0.461         |
| fps                | 361           |
| n_updates          | 78            |
| policy_entropy     | 1.1282164     |
| policy_loss        | -0.0032354048 |
| serial_timesteps   | 234000        |
| time_elapsed       | 662           |
| total_timesteps    | 234000        |
| value_loss         | 0.0173988     |
-------------------------

--------------------------------------
| approxkl           | 0.0071655433  |
| clipfrac           | 0.07191667    |
| explained_variance | 0.374         |
| fps                | 362           |
| n_updates          | 94            |
| policy_entropy     | 1.0976094     |
| policy_loss        | -0.0032460503 |
| serial_timesteps   | 282000        |
| time_elapsed       | 794           |
| total_timesteps    | 282000        |
| value_loss         | 0.024710504   |
--------------------------------------
--------------------------------------
| approxkl           | 0.008059341   |
| clipfrac           | 0.07258333    |
| explained_variance | 0.409         |
| fps                | 368           |
| n_updates          | 95            |
| policy_entropy     | 1.036468      |
| policy_loss        | -0.0048305998 |
| serial_timesteps   | 285000        |
| time_elapsed       | 802           |
| total_timesteps    | 285000        |
| value_loss         | 0.019427896   |
-------------------------

-------------------------------------
| approxkl           | 0.0070272437 |
| clipfrac           | 0.09741667   |
| explained_variance | 0.437        |
| fps                | 366          |
| n_updates          | 111          |
| policy_entropy     | 1.1044049    |
| policy_loss        | -0.006448725 |
| serial_timesteps   | 333000       |
| time_elapsed       | 936          |
| total_timesteps    | 333000       |
| value_loss         | 0.014140468  |
-------------------------------------
--------------------------------------
| approxkl           | 0.00546729    |
| clipfrac           | 0.0335        |
| explained_variance | 0.273         |
| fps                | 374           |
| n_updates          | 112           |
| policy_entropy     | 1.0427294     |
| policy_loss        | -0.0029905813 |
| serial_timesteps   | 336000        |
| time_elapsed       | 944           |
| total_timesteps    | 336000        |
| value_loss         | 0.030190945   |
--------------------------------------

--------------------------------------
| approxkl           | 0.005605165   |
| clipfrac           | 0.066583335   |
| explained_variance | 0.346         |
| fps                | 366           |
| n_updates          | 128           |
| policy_entropy     | 0.8196039     |
| policy_loss        | -0.0047717704 |
| serial_timesteps   | 384000        |
| time_elapsed       | 1.07e+03      |
| total_timesteps    | 384000        |
| value_loss         | 0.016158514   |
--------------------------------------
--------------------------------------
| approxkl           | 0.003412673   |
| clipfrac           | 0.028583335   |
| explained_variance | 0.489         |
| fps                | 373           |
| n_updates          | 129           |
| policy_entropy     | 0.8000409     |
| policy_loss        | -0.0032598472 |
| serial_timesteps   | 387000        |
| time_elapsed       | 1.08e+03      |
| total_timesteps    | 387000        |
| value_loss         | 0.0151894465  |
-------------------------

--------------------------------------
| approxkl           | 0.0041239494  |
| clipfrac           | 0.05008334    |
| explained_variance | 0.565         |
| fps                | 362           |
| n_updates          | 145           |
| policy_entropy     | 0.6677693     |
| policy_loss        | -0.0016507995 |
| serial_timesteps   | 435000        |
| time_elapsed       | 1.21e+03      |
| total_timesteps    | 435000        |
| value_loss         | 0.015431915   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0047836825  |
| clipfrac           | 0.06108334    |
| explained_variance | 0.306         |
| fps                | 368           |
| n_updates          | 146           |
| policy_entropy     | 0.7778126     |
| policy_loss        | -0.0040860036 |
| serial_timesteps   | 438000        |
| time_elapsed       | 1.22e+03      |
| total_timesteps    | 438000        |
| value_loss         | 0.021613665   |
-------------------------

-------------------------------------
| approxkl           | 0.0047406927 |
| clipfrac           | 0.050583333  |
| explained_variance | 0.456        |
| fps                | 373          |
| n_updates          | 162          |
| policy_entropy     | 0.77195776   |
| policy_loss        | -0.005079181 |
| serial_timesteps   | 486000       |
| time_elapsed       | 1.35e+03     |
| total_timesteps    | 486000       |
| value_loss         | 0.01709246   |
-------------------------------------
-------------------------------------
| approxkl           | 0.007138353  |
| clipfrac           | 0.077999994  |
| explained_variance | 0.381        |
| fps                | 369          |
| n_updates          | 163          |
| policy_entropy     | 0.8131922    |
| policy_loss        | -0.004593021 |
| serial_timesteps   | 489000       |
| time_elapsed       | 1.36e+03     |
| total_timesteps    | 489000       |
| value_loss         | 0.023813512  |
-------------------------------------
------------

Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished
Episode 7 finished
Episode 8 finished
Episode 9 finished
Episode 10 finished
Episode 11 finished
Episode 12 finished
Episode 13 finished
Episode 14 finished
Episode 15 finished
Episode 16 finished
Episode 17 finished
Episode 18 finished
Episode 19 finished
Win  18 / 20  games
Tie  1 / 20  games
Lose  1 / 20  games
1 epoch training time: 1534.7114624977112
Training models/lesson2c




Loading a model without an environment, this model cannot be trained until it has a valid environment.


---------------------------------------
| approxkl           | 4.4587423e-06  |
| clipfrac           | 0.0            |
| explained_variance | -0.386         |
| fps                | 360            |
| n_updates          | 1              |
| policy_entropy     | 1.7917305      |
| policy_loss        | -5.8009056e-05 |
| serial_timesteps   | 3000           |
| time_elapsed       | 2.62e-06       |
| total_timesteps    | 3000           |
| value_loss         | 0.213472       |
---------------------------------------
---------------------------------------
| approxkl           | 1.5058869e-05  |
| clipfrac           | 0.0            |
| explained_variance | -0.816         |
| fps                | 363            |
| n_updates          | 2              |
| policy_entropy     | 1.7916937      |
| policy_loss        | -0.00025658184 |
| serial_timesteps   | 6000           |
| time_elapsed       | 8.32           |
| total_timesteps    | 6000           |
| value_loss         | 0.027112715    |


--------------------------------------
| approxkl           | 0.0006857949  |
| clipfrac           | 0.0           |
| explained_variance | 0.219         |
| fps                | 367           |
| n_updates          | 9             |
| policy_entropy     | 1.6407148     |
| policy_loss        | -0.0013155397 |
| serial_timesteps   | 27000         |
| time_elapsed       | 66            |
| total_timesteps    | 27000         |
| value_loss         | 0.030314742   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00118349    |
| clipfrac           | 0.0           |
| explained_variance | 0.24          |
| fps                | 373           |
| n_updates          | 10            |
| policy_entropy     | 1.6175987     |
| policy_loss        | -0.0009213444 |
| serial_timesteps   | 30000         |
| time_elapsed       | 74.2          |
| total_timesteps    | 30000         |
| value_loss         | 0.03871164    |
-------------------------

--------------------------------------
| approxkl           | 0.0014950208  |
| clipfrac           | 0.0033333334  |
| explained_variance | 0.096         |
| fps                | 371           |
| n_updates          | 26            |
| policy_entropy     | 1.5394474     |
| policy_loss        | -0.0011272716 |
| serial_timesteps   | 78000         |
| time_elapsed       | 203           |
| total_timesteps    | 78000         |
| value_loss         | 0.015494416   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0024055974  |
| clipfrac           | 0.009416667   |
| explained_variance | 0.202         |
| fps                | 379           |
| n_updates          | 27            |
| policy_entropy     | 1.5214578     |
| policy_loss        | -0.0027431613 |
| serial_timesteps   | 81000         |
| time_elapsed       | 211           |
| total_timesteps    | 81000         |
| value_loss         | 0.01787447    |
-------------------------

--------------------------------------
| approxkl           | 0.0014616128  |
| clipfrac           | 0.01875       |
| explained_variance | 0.0454        |
| fps                | 373           |
| n_updates          | 43            |
| policy_entropy     | 1.1375691     |
| policy_loss        | -0.0014365963 |
| serial_timesteps   | 129000        |
| time_elapsed       | 339           |
| total_timesteps    | 129000        |
| value_loss         | 0.014905233   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00079541915 |
| clipfrac           | 0.008333334   |
| explained_variance | 0.0155        |
| fps                | 379           |
| n_updates          | 44            |
| policy_entropy     | 1.165091      |
| policy_loss        | -0.0005421757 |
| serial_timesteps   | 132000        |
| time_elapsed       | 347           |
| total_timesteps    | 132000        |
| value_loss         | 0.015385114   |
-------------------------

--------------------------------------
| approxkl           | 0.0017878497  |
| clipfrac           | 0.025         |
| explained_variance | 0.0886        |
| fps                | 379           |
| n_updates          | 60            |
| policy_entropy     | 1.1868477     |
| policy_loss        | -0.0008683629 |
| serial_timesteps   | 180000        |
| time_elapsed       | 474           |
| total_timesteps    | 180000        |
| value_loss         | 0.008027705   |
--------------------------------------
---------------------------------------
| approxkl           | 0.005310106    |
| clipfrac           | 0.040166665    |
| explained_variance | -0.00275       |
| fps                | 373            |
| n_updates          | 61             |
| policy_entropy     | 1.2015743      |
| policy_loss        | -0.00056420336 |
| serial_timesteps   | 183000         |
| time_elapsed       | 482            |
| total_timesteps    | 183000         |
| value_loss         | 0.0116197495   |
-------------

--------------------------------------
| approxkl           | 0.0012495496  |
| clipfrac           | 0.00083333335 |
| explained_variance | -0.0302       |
| fps                | 330           |
| n_updates          | 77            |
| policy_entropy     | 1.2506032     |
| policy_loss        | -0.0010646431 |
| serial_timesteps   | 231000        |
| time_elapsed       | 625           |
| total_timesteps    | 231000        |
| value_loss         | 0.009685555   |
--------------------------------------
---------------------------------------
| approxkl           | 0.0018045484   |
| clipfrac           | 0.037499998    |
| explained_variance | 0.0562         |
| fps                | 332            |
| n_updates          | 78             |
| policy_entropy     | 1.2910086      |
| policy_loss        | -0.00065052015 |
| serial_timesteps   | 234000         |
| time_elapsed       | 634            |
| total_timesteps    | 234000         |
| value_loss         | 0.0069611524   |
-------------

--------------------------------------
| approxkl           | 0.003720721   |
| clipfrac           | 0.0165        |
| explained_variance | 0.0222        |
| fps                | 369           |
| n_updates          | 94            |
| policy_entropy     | 1.2032548     |
| policy_loss        | -0.0020502994 |
| serial_timesteps   | 282000        |
| time_elapsed       | 766           |
| total_timesteps    | 282000        |
| value_loss         | 0.016456753   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0010889154  |
| clipfrac           | 0.01175       |
| explained_variance | 0.0157        |
| fps                | 378           |
| n_updates          | 95            |
| policy_entropy     | 1.0938365     |
| policy_loss        | -0.0010527656 |
| serial_timesteps   | 285000        |
| time_elapsed       | 775           |
| total_timesteps    | 285000        |
| value_loss         | 0.011827394   |
-------------------------

--------------------------------------
| approxkl           | 0.0006224152  |
| clipfrac           | 0.00024999998 |
| explained_variance | 0.047         |
| fps                | 371           |
| n_updates          | 111           |
| policy_entropy     | 1.295352      |
| policy_loss        | -0.0009847945 |
| serial_timesteps   | 333000        |
| time_elapsed       | 903           |
| total_timesteps    | 333000        |
| value_loss         | 0.015441532   |
--------------------------------------
--------------------------------------
| approxkl           | 0.002290379   |
| clipfrac           | 0.00425       |
| explained_variance | 0.00261       |
| fps                | 378           |
| n_updates          | 112           |
| policy_entropy     | 1.2625141     |
| policy_loss        | -0.0016648922 |
| serial_timesteps   | 336000        |
| time_elapsed       | 911           |
| total_timesteps    | 336000        |
| value_loss         | 0.011769107   |
-------------------------

---------------------------------------
| approxkl           | 0.002792992    |
| clipfrac           | 0.0            |
| explained_variance | -0.00363       |
| fps                | 371            |
| n_updates          | 128            |
| policy_entropy     | 1.3979934      |
| policy_loss        | -0.00043827528 |
| serial_timesteps   | 384000         |
| time_elapsed       | 1.04e+03       |
| total_timesteps    | 384000         |
| value_loss         | 0.008026866    |
---------------------------------------
---------------------------------------
| approxkl           | 0.00094511523  |
| clipfrac           | 0.0            |
| explained_variance | -0.0162        |
| fps                | 366            |
| n_updates          | 129            |
| policy_entropy     | 1.3889549      |
| policy_loss        | -0.00084018067 |
| serial_timesteps   | 387000         |
| time_elapsed       | 1.05e+03       |
| total_timesteps    | 387000         |
| value_loss         | 0.0080468925   |


--------------------------------------
| approxkl           | 0.002502463   |
| clipfrac           | 0.022416666   |
| explained_variance | 0.00626       |
| fps                | 380           |
| n_updates          | 145           |
| policy_entropy     | 1.4389042     |
| policy_loss        | -0.0017391079 |
| serial_timesteps   | 435000        |
| time_elapsed       | 1.18e+03      |
| total_timesteps    | 435000        |
| value_loss         | 0.008211542   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0033775934  |
| clipfrac           | 0.0020833334  |
| explained_variance | 0.0212        |
| fps                | 371           |
| n_updates          | 146           |
| policy_entropy     | 1.45835       |
| policy_loss        | -0.0021166324 |
| serial_timesteps   | 438000        |
| time_elapsed       | 1.19e+03      |
| total_timesteps    | 438000        |
| value_loss         | 0.008578981   |
-------------------------

--------------------------------------
| approxkl           | 0.002360718   |
| clipfrac           | 0.033833336   |
| explained_variance | -0.0119       |
| fps                | 379           |
| n_updates          | 162           |
| policy_entropy     | 1.2619606     |
| policy_loss        | -0.0013468806 |
| serial_timesteps   | 486000        |
| time_elapsed       | 1.31e+03      |
| total_timesteps    | 486000        |
| value_loss         | 0.014260417   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0032248653  |
| clipfrac           | 0.031416666   |
| explained_variance | -0.0153       |
| fps                | 374           |
| n_updates          | 163           |
| policy_entropy     | 1.246695      |
| policy_loss        | -0.0021761581 |
| serial_timesteps   | 489000        |
| time_elapsed       | 1.32e+03      |
| total_timesteps    | 489000        |
| value_loss         | 0.0117236655  |
-------------------------

Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished
Episode 7 finished
Episode 8 finished
Episode 9 finished
Episode 10 finished
Episode 11 finished
Episode 12 finished
Episode 13 finished
Episode 14 finished
Episode 15 finished
Episode 16 finished
Episode 17 finished
Episode 18 finished
Episode 19 finished
Win  0 / 20  games
Tie  20 / 20  games
Lose  0 / 20  games
1 epoch training time: 1991.0437941551208
Training models/lesson2c




Loading a model without an environment, this model cannot be trained until it has a valid environment.


--------------------------------------
| approxkl           | 1.5289585e-05 |
| clipfrac           | 0.0           |
| explained_variance | -0.238        |
| fps                | 353           |
| n_updates          | 1             |
| policy_entropy     | 1.7917207     |
| policy_loss        | -0.0003677368 |
| serial_timesteps   | 3000          |
| time_elapsed       | 1.91e-06      |
| total_timesteps    | 3000          |
| value_loss         | 0.40715152    |
--------------------------------------
---------------------------------------
| approxkl           | 3.366681e-05   |
| clipfrac           | 0.0            |
| explained_variance | -0.224         |
| fps                | 370            |
| n_updates          | 2              |
| policy_entropy     | 1.7915962      |
| policy_loss        | -0.00064390345 |
| serial_timesteps   | 6000           |
| time_elapsed       | 8.49           |
| total_timesteps    | 6000           |
| value_loss         | 0.13337101     |
-------------

--------------------------------------
| approxkl           | 0.0020564033  |
| clipfrac           | 0.0125        |
| explained_variance | -0.0236       |
| fps                | 366           |
| n_updates          | 7             |
| policy_entropy     | 1.7068461     |
| policy_loss        | -0.0022780392 |
| serial_timesteps   | 21000         |
| time_elapsed       | 49.3          |
| total_timesteps    | 21000         |
| value_loss         | 0.024181731   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0022558742  |
| clipfrac           | 0.017166667   |
| explained_variance | -0.027        |
| fps                | 374           |
| n_updates          | 8             |
| policy_entropy     | 1.668661      |
| policy_loss        | -0.0035193719 |
| serial_timesteps   | 24000         |
| time_elapsed       | 57.5          |
| total_timesteps    | 24000         |
| value_loss         | 0.038211044   |
-------------------------

---------------------------------------
| approxkl           | 0.0015761881   |
| clipfrac           | 0.0            |
| explained_variance | 0.11           |
| fps                | 372            |
| n_updates          | 24             |
| policy_entropy     | 1.5439472      |
| policy_loss        | -0.00081273395 |
| serial_timesteps   | 72000          |
| time_elapsed       | 186            |
| total_timesteps    | 72000          |
| value_loss         | 0.009581163    |
---------------------------------------
--------------------------------------
| approxkl           | 0.000958747   |
| clipfrac           | 0.0005833333  |
| explained_variance | 0.131         |
| fps                | 378           |
| n_updates          | 25            |
| policy_entropy     | 1.5490426     |
| policy_loss        | -0.0017220408 |
| serial_timesteps   | 75000         |
| time_elapsed       | 194           |
| total_timesteps    | 75000         |
| value_loss         | 0.008552564   |
------------

--------------------------------------
| approxkl           | 0.0032851433  |
| clipfrac           | 0.04625       |
| explained_variance | 0.0722        |
| fps                | 368           |
| n_updates          | 41            |
| policy_entropy     | 1.4739621     |
| policy_loss        | -0.0021117865 |
| serial_timesteps   | 123000        |
| time_elapsed       | 322           |
| total_timesteps    | 123000        |
| value_loss         | 0.009077436   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00048979156 |
| clipfrac           | 0.0           |
| explained_variance | 0.133         |
| fps                | 379           |
| n_updates          | 42            |
| policy_entropy     | 1.510565      |
| policy_loss        | 0.0003525829  |
| serial_timesteps   | 126000        |
| time_elapsed       | 330           |
| total_timesteps    | 126000        |
| value_loss         | 0.009123251   |
-------------------------

--------------------------------------
| approxkl           | 0.006437896   |
| clipfrac           | 0.13508333    |
| explained_variance | -0.0166       |
| fps                | 371           |
| n_updates          | 58            |
| policy_entropy     | 1.4582086     |
| policy_loss        | -0.0032036724 |
| serial_timesteps   | 174000        |
| time_elapsed       | 458           |
| total_timesteps    | 174000        |
| value_loss         | 0.010588939   |
--------------------------------------
--------------------------------------
| approxkl           | 0.005714491   |
| clipfrac           | 0.05866667    |
| explained_variance | 0.0203        |
| fps                | 380           |
| n_updates          | 59            |
| policy_entropy     | 1.4451978     |
| policy_loss        | -0.0035206713 |
| serial_timesteps   | 177000        |
| time_elapsed       | 467           |
| total_timesteps    | 177000        |
| value_loss         | 0.012153356   |
-------------------------

--------------------------------------
| approxkl           | 0.005781364   |
| clipfrac           | 0.07275       |
| explained_variance | 0.139         |
| fps                | 377           |
| n_updates          | 75            |
| policy_entropy     | 1.4710581     |
| policy_loss        | -0.0049553984 |
| serial_timesteps   | 225000        |
| time_elapsed       | 595           |
| total_timesteps    | 225000        |
| value_loss         | 0.007664488   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0006139469 |
| clipfrac           | 0.0          |
| explained_variance | 0.0416       |
| fps                | 373          |
| n_updates          | 76           |
| policy_entropy     | 1.4582965    |
| policy_loss        | 7.578575e-05 |
| serial_timesteps   | 228000       |
| time_elapsed       | 602          |
| total_timesteps    | 228000       |
| value_loss         | 0.010343215  |
-------------------------------------

-------------------------------------
| approxkl           | 0.0029717886 |
| clipfrac           | 0.057166666  |
| explained_variance | 0.019        |
| fps                | 346          |
| n_updates          | 92           |
| policy_entropy     | 1.1814978    |
| policy_loss        | -0.002073117 |
| serial_timesteps   | 276000       |
| time_elapsed       | 741          |
| total_timesteps    | 276000       |
| value_loss         | 0.009554355  |
-------------------------------------
--------------------------------------
| approxkl           | 0.00254406    |
| clipfrac           | 0.007833334   |
| explained_variance | -0.0131       |
| fps                | 339           |
| n_updates          | 93            |
| policy_entropy     | 1.1072938     |
| policy_loss        | -0.0012749535 |
| serial_timesteps   | 279000        |
| time_elapsed       | 750           |
| total_timesteps    | 279000        |
| value_loss         | 0.011440034   |
--------------------------------------

--------------------------------------
| approxkl           | 0.0029884302  |
| clipfrac           | 0.03491667    |
| explained_variance | -0.0169       |
| fps                | 320           |
| n_updates          | 109           |
| policy_entropy     | 1.0743297     |
| policy_loss        | -0.0005472568 |
| serial_timesteps   | 327000        |
| time_elapsed       | 895           |
| total_timesteps    | 327000        |
| value_loss         | 0.013707835   |
--------------------------------------
-------------------------------------
| approxkl           | 0.000373156  |
| clipfrac           | 0.0          |
| explained_variance | 0.0171       |
| fps                | 330          |
| n_updates          | 110          |
| policy_entropy     | 1.0895448    |
| policy_loss        | -0.000294878 |
| serial_timesteps   | 330000       |
| time_elapsed       | 905          |
| total_timesteps    | 330000       |
| value_loss         | 0.0064791846 |
-------------------------------------

---------------------------------------
| approxkl           | 0.00086795137  |
| clipfrac           | 0.0            |
| explained_variance | -0.00347       |
| fps                | 340            |
| n_updates          | 126            |
| policy_entropy     | 1.0630404      |
| policy_loss        | -0.00047651178 |
| serial_timesteps   | 378000         |
| time_elapsed       | 1.05e+03       |
| total_timesteps    | 378000         |
| value_loss         | 0.007484794    |
---------------------------------------
--------------------------------------
| approxkl           | 0.0020957235  |
| clipfrac           | 0.0           |
| explained_variance | -0.0102       |
| fps                | 340           |
| n_updates          | 127           |
| policy_entropy     | 1.0735283     |
| policy_loss        | -0.0006574303 |
| serial_timesteps   | 381000        |
| time_elapsed       | 1.06e+03      |
| total_timesteps    | 381000        |
| value_loss         | 0.007579741   |
------------

--------------------------------------
| approxkl           | 0.000777905   |
| clipfrac           | 0.0           |
| explained_variance | -0.00406      |
| fps                | 329           |
| n_updates          | 143           |
| policy_entropy     | 0.80179954    |
| policy_loss        | -5.713661e-05 |
| serial_timesteps   | 429000        |
| time_elapsed       | 1.19e+03      |
| total_timesteps    | 429000        |
| value_loss         | 0.008350667   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0019980269  |
| clipfrac           | 0.020333335   |
| explained_variance | 0.00126       |
| fps                | 344           |
| n_updates          | 144           |
| policy_entropy     | 0.86388886    |
| policy_loss        | -0.0009862789 |
| serial_timesteps   | 432000        |
| time_elapsed       | 1.2e+03       |
| total_timesteps    | 432000        |
| value_loss         | 0.007613913   |
-------------------------

--------------------------------------
| approxkl           | 0.0003689018  |
| clipfrac           | 0.0           |
| explained_variance | -0.0154       |
| fps                | 340           |
| n_updates          | 159           |
| policy_entropy     | 0.5745129     |
| policy_loss        | -0.0001499992 |
| serial_timesteps   | 477000        |
| time_elapsed       | 1.34e+03      |
| total_timesteps    | 477000        |
| value_loss         | 0.010509806   |
--------------------------------------
---------------------------------------
| approxkl           | 0.0016754356   |
| clipfrac           | 0.030416667    |
| explained_variance | -0.00699       |
| fps                | 338            |
| n_updates          | 160            |
| policy_entropy     | 0.5783698      |
| policy_loss        | -0.00083970866 |
| serial_timesteps   | 480000         |
| time_elapsed       | 1.34e+03       |
| total_timesteps    | 480000         |
| value_loss         | 0.011807907    |
-------------

Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished
Episode 7 finished
Episode 8 finished


Episode 9 finished
Episode 10 finished
Episode 11 finished
Episode 12 finished
Episode 13 finished
Episode 14 finished
Episode 15 finished
Episode 16 finished
Episode 17 finished
Episode 18 finished
Episode 19 finished
Win  0 / 20  games
Tie  20 / 20  games
Lose  0 / 20  games
1 epoch training time: 2037.2884848117828
Training models/lesson2c


Loading a model without an environment, this model cannot be trained until it has a valid environment.


---------------------------------------
| approxkl           | 1.16628635e-05 |
| clipfrac           | 0.0            |
| explained_variance | -1.05          |
| fps                | 350            |
| n_updates          | 1              |
| policy_entropy     | 1.791727       |
| policy_loss        | -0.00018356151 |
| serial_timesteps   | 3000           |
| time_elapsed       | 2.15e-06       |
| total_timesteps    | 3000           |
| value_loss         | 0.19768052     |
---------------------------------------
---------------------------------------
| approxkl           | 3.428996e-05   |
| clipfrac           | 0.0            |
| explained_variance | -0.734         |
| fps                | 361            |
| n_updates          | 2              |
| policy_entropy     | 1.7916481      |
| policy_loss        | -0.00045077375 |
| serial_timesteps   | 6000           |
| time_elapsed       | 8.56           |
| total_timesteps    | 6000           |
| value_loss         | 0.011497382    |


--------------------------------------
| approxkl           | 0.0019159673  |
| clipfrac           | 0.0           |
| explained_variance | 0.0217        |
| fps                | 360           |
| n_updates          | 4             |
| policy_entropy     | 1.7842209     |
| policy_loss        | -0.0035034902 |
| serial_timesteps   | 12000         |
| time_elapsed       | 25.3          |
| total_timesteps    | 12000         |
| value_loss         | 0.015749672   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0037263478 |
| clipfrac           | 0.0050833337 |
| explained_variance | -0.518       |
| fps                | 359          |
| n_updates          | 5            |
| policy_entropy     | 1.757714     |
| policy_loss        | -0.002311876 |
| serial_timesteps   | 15000        |
| time_elapsed       | 33.6         |
| total_timesteps    | 15000        |
| value_loss         | 0.006705081  |
-------------------------------------

--------------------------------------
| approxkl           | 0.0012871153  |
| clipfrac           | 0.003         |
| explained_variance | 0.27          |
| fps                | 325           |
| n_updates          | 21            |
| policy_entropy     | 1.6443051     |
| policy_loss        | -0.0010640459 |
| serial_timesteps   | 63000         |
| time_elapsed       | 169           |
| total_timesteps    | 63000         |
| value_loss         | 0.028921606   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00038014582 |
| clipfrac           | 0.0           |
| explained_variance | 0.398         |
| fps                | 307           |
| n_updates          | 22            |
| policy_entropy     | 1.6601717     |
| policy_loss        | -0.0009299244 |
| serial_timesteps   | 66000         |
| time_elapsed       | 179           |
| total_timesteps    | 66000         |
| value_loss         | 0.029700661   |
-------------------------

--------------------------------------
| approxkl           | 0.0033750553  |
| clipfrac           | 0.017166667   |
| explained_variance | 0.103         |
| fps                | 349           |
| n_updates          | 38            |
| policy_entropy     | 1.3242629     |
| policy_loss        | -0.0024177947 |
| serial_timesteps   | 114000        |
| time_elapsed       | 328           |
| total_timesteps    | 114000        |
| value_loss         | 0.025001926   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0022683532 |
| clipfrac           | 0.014083333  |
| explained_variance | 0.124        |
| fps                | 365          |
| n_updates          | 39           |
| policy_entropy     | 1.2280155    |
| policy_loss        | -0.002118164 |
| serial_timesteps   | 117000       |
| time_elapsed       | 336          |
| total_timesteps    | 117000       |
| value_loss         | 0.025118515  |
-------------------------------------

--------------------------------------
| approxkl           | 0.002751435   |
| clipfrac           | 0.021416666   |
| explained_variance | 0.251         |
| fps                | 330           |
| n_updates          | 55            |
| policy_entropy     | 1.1987675     |
| policy_loss        | -0.0028375648 |
| serial_timesteps   | 165000        |
| time_elapsed       | 479           |
| total_timesteps    | 165000        |
| value_loss         | 0.018315807   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0059409556  |
| clipfrac           | 0.06725       |
| explained_variance | 0.191         |
| fps                | 320           |
| n_updates          | 56            |
| policy_entropy     | 1.2506578     |
| policy_loss        | -0.0037805364 |
| serial_timesteps   | 168000        |
| time_elapsed       | 488           |
| total_timesteps    | 168000        |
| value_loss         | 0.018487774   |
-------------------------

--------------------------------------
| approxkl           | 0.0073654014  |
| clipfrac           | 0.0695        |
| explained_variance | 0.14          |
| fps                | 202           |
| n_updates          | 72            |
| policy_entropy     | 1.1736617     |
| policy_loss        | -0.0071824356 |
| serial_timesteps   | 216000        |
| time_elapsed       | 649           |
| total_timesteps    | 216000        |
| value_loss         | 0.02024711    |
--------------------------------------
--------------------------------------
| approxkl           | 0.0016070787  |
| clipfrac           | 0.013916668   |
| explained_variance | 0.257         |
| fps                | 245           |
| n_updates          | 73            |
| policy_entropy     | 1.1875308     |
| policy_loss        | -0.0020587158 |
| serial_timesteps   | 219000        |
| time_elapsed       | 664           |
| total_timesteps    | 219000        |
| value_loss         | 0.01590755    |
-------------------------

--------------------------------------
| approxkl           | 0.005823447   |
| clipfrac           | 0.027333334   |
| explained_variance | 0.274         |
| fps                | 225           |
| n_updates          | 89            |
| policy_entropy     | 1.122401      |
| policy_loss        | -0.0033886388 |
| serial_timesteps   | 267000        |
| time_elapsed       | 880           |
| total_timesteps    | 267000        |
| value_loss         | 0.018951716   |
--------------------------------------
-------------------------------------
| approxkl           | 0.006204293  |
| clipfrac           | 0.052249998  |
| explained_variance | 0.419        |
| fps                | 313          |
| n_updates          | 90           |
| policy_entropy     | 1.1233674    |
| policy_loss        | -0.005812764 |
| serial_timesteps   | 270000       |
| time_elapsed       | 894          |
| total_timesteps    | 270000       |
| value_loss         | 0.019666113  |
-------------------------------------

--------------------------------------
| approxkl           | 0.0063504027  |
| clipfrac           | 0.094500005   |
| explained_variance | 0.316         |
| fps                | 376           |
| n_updates          | 106           |
| policy_entropy     | 1.0649759     |
| policy_loss        | -0.0039867116 |
| serial_timesteps   | 318000        |
| time_elapsed       | 1.03e+03      |
| total_timesteps    | 318000        |
| value_loss         | 0.024106717   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0047549033  |
| clipfrac           | 0.040416665   |
| explained_variance | 0.338         |
| fps                | 370           |
| n_updates          | 107           |
| policy_entropy     | 0.9354617     |
| policy_loss        | -0.0028091415 |
| serial_timesteps   | 321000        |
| time_elapsed       | 1.04e+03      |
| total_timesteps    | 321000        |
| value_loss         | 0.02317047    |
-------------------------

--------------------------------------
| approxkl           | 0.0071154027  |
| clipfrac           | 0.082666665   |
| explained_variance | 0.312         |
| fps                | 370           |
| n_updates          | 123           |
| policy_entropy     | 0.81055087    |
| policy_loss        | -0.0057878233 |
| serial_timesteps   | 369000        |
| time_elapsed       | 1.17e+03      |
| total_timesteps    | 369000        |
| value_loss         | 0.020720618   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0031869079  |
| clipfrac           | 0.031916667   |
| explained_variance | 0.347         |
| fps                | 367           |
| n_updates          | 124           |
| policy_entropy     | 0.82175684    |
| policy_loss        | -0.0037675325 |
| serial_timesteps   | 372000        |
| time_elapsed       | 1.17e+03      |
| total_timesteps    | 372000        |
| value_loss         | 0.016164992   |
-------------------------

--------------------------------------
| approxkl           | 0.0042228494  |
| clipfrac           | 0.041083336   |
| explained_variance | 0.328         |
| fps                | 375           |
| n_updates          | 140           |
| policy_entropy     | 0.8578416     |
| policy_loss        | -0.0022450553 |
| serial_timesteps   | 420000        |
| time_elapsed       | 1.31e+03      |
| total_timesteps    | 420000        |
| value_loss         | 0.014299161   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00367498    |
| clipfrac           | 0.040249996   |
| explained_variance | 0.361         |
| fps                | 369           |
| n_updates          | 141           |
| policy_entropy     | 0.8124952     |
| policy_loss        | -0.0029842432 |
| serial_timesteps   | 423000        |
| time_elapsed       | 1.31e+03      |
| total_timesteps    | 423000        |
| value_loss         | 0.021507777   |
-------------------------

-------------------------------------
| approxkl           | 0.005642077  |
| clipfrac           | 0.06341667   |
| explained_variance | 0.501        |
| fps                | 369          |
| n_updates          | 157          |
| policy_entropy     | 0.81801623   |
| policy_loss        | -0.003987565 |
| serial_timesteps   | 471000       |
| time_elapsed       | 1.45e+03     |
| total_timesteps    | 471000       |
| value_loss         | 0.017685825  |
-------------------------------------
--------------------------------------
| approxkl           | 0.006191208   |
| clipfrac           | 0.057166673   |
| explained_variance | 0.517         |
| fps                | 316           |
| n_updates          | 158           |
| policy_entropy     | 0.7459263     |
| policy_loss        | -0.0054181213 |
| serial_timesteps   | 474000        |
| time_elapsed       | 1.46e+03      |
| total_timesteps    | 474000        |
| value_loss         | 0.014103709   |
--------------------------------------



Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished
Episode 7 finished
Episode 8 finished
Episode 9 finished
Episode 10 finished
Episode 11 finished
Episode 12 finished
Episode 13 finished
Episode 14 finished
Episode 15 finished
Episode 16 finished
Episode 17 finished
Episode 18 finished
Episode 19 finished
Win  20 / 20  games
Tie  0 / 20  games
Lose  0 / 20  games
1 epoch training time: 1658.9363100528717
Training models/lesson2d


---------------------------------------
| approxkl           | 1.6022812e-05  |
| clipfrac           | 0.0            |
| explained_variance | -0.11          |
| fps                | 185            |
| n_updates          | 1              |
| policy_entropy     | 1.7917043      |
| policy_loss        | -0.00048320426 |
| serial_timesteps   | 3000           |
| time_elapsed       | 3.1e-06        |
| total_timesteps    | 3000           |
| value_loss         | 0.28994563     |
---------------------------------------
--------------------------------------
| approxkl           | 0.00012026212 |
| clipfrac           | 0.0           |
| explained_variance | -0.0614       |
| fps                | 178           |
| n_updates          | 2             |
| policy_entropy     | 1.7912914     |
| policy_loss        | -0.001267747  |
| serial_timesteps   | 6000          |
| time_elapsed       | 16.2          |
| total_timesteps    | 6000          |
| value_loss         | 0.11588064    |
------------

---------------------------------------
| approxkl           | 0.0003698916   |
| clipfrac           | 0.0            |
| explained_variance | 0.28           |
| fps                | 195            |
| n_updates          | 10             |
| policy_entropy     | 1.5581592      |
| policy_loss        | -0.00021429287 |
| serial_timesteps   | 30000          |
| time_elapsed       | 144            |
| total_timesteps    | 30000          |
| value_loss         | 0.0476596      |
---------------------------------------
--------------------------------------
| approxkl           | 0.003399734   |
| clipfrac           | 0.04883333    |
| explained_variance | 0.307         |
| fps                | 194           |
| n_updates          | 11            |
| policy_entropy     | 1.616323      |
| policy_loss        | -0.0037788642 |
| serial_timesteps   | 33000         |
| time_elapsed       | 160           |
| total_timesteps    | 33000         |
| value_loss         | 0.040199164   |
------------

-------------------------------------
| approxkl           | 0.002132372  |
| clipfrac           | 0.009333333  |
| explained_variance | 0.281        |
| fps                | 220          |
| n_updates          | 27           |
| policy_entropy     | 1.3563982    |
| policy_loss        | -0.005362411 |
| serial_timesteps   | 81000        |
| time_elapsed       | 382          |
| total_timesteps    | 81000        |
| value_loss         | 0.03428496   |
-------------------------------------
--------------------------------------
| approxkl           | 0.0023621328  |
| clipfrac           | 0.015         |
| explained_variance | 0.247         |
| fps                | 348           |
| n_updates          | 28            |
| policy_entropy     | 1.3601769     |
| policy_loss        | -0.0039845672 |
| serial_timesteps   | 84000         |
| time_elapsed       | 395           |
| total_timesteps    | 84000         |
| value_loss         | 0.045990773   |
--------------------------------------

--------------------------------------
| approxkl           | 0.005198163   |
| clipfrac           | 0.04658334    |
| explained_variance | 0.312         |
| fps                | 267           |
| n_updates          | 44            |
| policy_entropy     | 0.84165174    |
| policy_loss        | -0.0050598653 |
| serial_timesteps   | 132000        |
| time_elapsed       | 572           |
| total_timesteps    | 132000        |
| value_loss         | 0.01896399    |
--------------------------------------
--------------------------------------
| approxkl           | 0.002708747   |
| clipfrac           | 0.026416669   |
| explained_variance | 0.217         |
| fps                | 278           |
| n_updates          | 45            |
| policy_entropy     | 0.8427574     |
| policy_loss        | -0.0026967837 |
| serial_timesteps   | 135000        |
| time_elapsed       | 583           |
| total_timesteps    | 135000        |
| value_loss         | 0.02684486    |
-------------------------

--------------------------------------
| approxkl           | 0.014884175   |
| clipfrac           | 0.12891667    |
| explained_variance | 0.519         |
| fps                | 288           |
| n_updates          | 61            |
| policy_entropy     | 0.69945836    |
| policy_loss        | -0.0043860665 |
| serial_timesteps   | 183000        |
| time_elapsed       | 753           |
| total_timesteps    | 183000        |
| value_loss         | 0.0061847367  |
--------------------------------------
--------------------------------------
| approxkl           | 0.003418072   |
| clipfrac           | 0.033916667   |
| explained_variance | 0.0751        |
| fps                | 277           |
| n_updates          | 62            |
| policy_entropy     | 0.6738801     |
| policy_loss        | -0.0016755007 |
| serial_timesteps   | 186000        |
| time_elapsed       | 764           |
| total_timesteps    | 186000        |
| value_loss         | 0.0041763154  |
-------------------------

--------------------------------------
| approxkl           | 0.0037092343  |
| clipfrac           | 0.05316667    |
| explained_variance | -4.48         |
| fps                | 327           |
| n_updates          | 78            |
| policy_entropy     | 0.5646545     |
| policy_loss        | -0.0017694978 |
| serial_timesteps   | 234000        |
| time_elapsed       | 933           |
| total_timesteps    | 234000        |
| value_loss         | 0.004749191   |
--------------------------------------
--------------------------------------
| approxkl           | 0.007147401   |
| clipfrac           | 0.049166664   |
| explained_variance | 0.0857        |
| fps                | 315           |
| n_updates          | 79            |
| policy_entropy     | 0.5740659     |
| policy_loss        | -0.0041135107 |
| serial_timesteps   | 237000        |
| time_elapsed       | 942           |
| total_timesteps    | 237000        |
| value_loss         | 0.009685022   |
-------------------------

--------------------------------------
| approxkl           | 0.005137942   |
| clipfrac           | 0.06625       |
| explained_variance | 0.832         |
| fps                | 327           |
| n_updates          | 95            |
| policy_entropy     | 0.42995754    |
| policy_loss        | -0.0016728992 |
| serial_timesteps   | 285000        |
| time_elapsed       | 1.09e+03      |
| total_timesteps    | 285000        |
| value_loss         | 0.0030989656  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0050703567  |
| clipfrac           | 0.05475       |
| explained_variance | 0.81          |
| fps                | 340           |
| n_updates          | 96            |
| policy_entropy     | 0.42616707    |
| policy_loss        | -0.0018760115 |
| serial_timesteps   | 288000        |
| time_elapsed       | 1.1e+03       |
| total_timesteps    | 288000        |
| value_loss         | 0.0013312511  |
-------------------------

--------------------------------------
| approxkl           | 0.0022026526  |
| clipfrac           | 0.028250001   |
| explained_variance | -3.42         |
| fps                | 326           |
| n_updates          | 112           |
| policy_entropy     | 0.36901826    |
| policy_loss        | -0.0014666145 |
| serial_timesteps   | 336000        |
| time_elapsed       | 1.25e+03      |
| total_timesteps    | 336000        |
| value_loss         | 0.006994773   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0032988465  |
| clipfrac           | 0.032833334   |
| explained_variance | -3.82         |
| fps                | 341           |
| n_updates          | 113           |
| policy_entropy     | 0.36079606    |
| policy_loss        | -0.0028857407 |
| serial_timesteps   | 339000        |
| time_elapsed       | 1.26e+03      |
| total_timesteps    | 339000        |
| value_loss         | 0.0021330414  |
-------------------------

-------------------------------------
| approxkl           | 0.005163947  |
| clipfrac           | 0.07633334   |
| explained_variance | -1.18        |
| fps                | 327          |
| n_updates          | 129          |
| policy_entropy     | 0.32078755   |
| policy_loss        | -0.002952212 |
| serial_timesteps   | 387000       |
| time_elapsed       | 1.4e+03      |
| total_timesteps    | 387000       |
| value_loss         | 0.0016883325 |
-------------------------------------
--------------------------------------
| approxkl           | 0.0062010726  |
| clipfrac           | 0.07725       |
| explained_variance | 0.497         |
| fps                | 340           |
| n_updates          | 130           |
| policy_entropy     | 0.3535354     |
| policy_loss        | -0.0013317874 |
| serial_timesteps   | 390000        |
| time_elapsed       | 1.41e+03      |
| total_timesteps    | 390000        |
| value_loss         | 0.0010352212  |
--------------------------------------

-------------------------------------
| approxkl           | 0.0020293328 |
| clipfrac           | 0.025083331  |
| explained_variance | -5.4         |
| fps                | 334          |
| n_updates          | 146          |
| policy_entropy     | 0.34149843   |
| policy_loss        | -0.001784988 |
| serial_timesteps   | 438000       |
| time_elapsed       | 1.55e+03     |
| total_timesteps    | 438000       |
| value_loss         | 0.0068542487 |
-------------------------------------
--------------------------------------
| approxkl           | 0.003983439   |
| clipfrac           | 0.06266666    |
| explained_variance | 0.00867       |
| fps                | 340           |
| n_updates          | 147           |
| policy_entropy     | 0.33767474    |
| policy_loss        | -0.0009799434 |
| serial_timesteps   | 441000        |
| time_elapsed       | 1.56e+03      |
| total_timesteps    | 441000        |
| value_loss         | 0.0061342823  |
--------------------------------------

-------------------------------------
| approxkl           | 0.008090894  |
| clipfrac           | 0.079333335  |
| explained_variance | 0.883        |
| fps                | 321          |
| n_updates          | 163          |
| policy_entropy     | 0.31490886   |
| policy_loss        | 0.0044526486 |
| serial_timesteps   | 489000       |
| time_elapsed       | 1.7e+03      |
| total_timesteps    | 489000       |
| value_loss         | 0.0018824325 |
-------------------------------------
--------------------------------------
| approxkl           | 0.004021501   |
| clipfrac           | 0.044583336   |
| explained_variance | 0.579         |
| fps                | 358           |
| n_updates          | 164           |
| policy_entropy     | 0.34491825    |
| policy_loss        | -0.0008408964 |
| serial_timesteps   | 492000        |
| time_elapsed       | 1.71e+03      |
| total_timesteps    | 492000        |
| value_loss         | 0.0021693467  |
--------------------------------------

Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished
Episode 7 finished
Episode 8 finished
Episode 9 finished
Episode 10 finished
Episode 11 finished
Episode 12 finished
Episode 13 finished
Episode 14 finished
Episode 15 finished
Episode 16 finished
Episode 17 finished
Episode 18 finished
Episode 19 finished
Win  20 / 20  games
Tie  0 / 20  games
Lose  0 / 20  games
1 epoch training time: 1791.5066192150116
Training models/lesson2e




---------------------------------------
| approxkl           | 7.616801e-06   |
| clipfrac           | 0.0            |
| explained_variance | -2.42          |
| fps                | 326            |
| n_updates          | 1              |
| policy_entropy     | 1.7917337      |
| policy_loss        | -0.00013641757 |
| serial_timesteps   | 3000           |
| time_elapsed       | 1.91e-06       |
| total_timesteps    | 3000           |
| value_loss         | 0.3558254      |
---------------------------------------
---------------------------------------
| approxkl           | 2.8722821e-05  |
| clipfrac           | 0.0            |
| explained_variance | -0.245         |
| fps                | 366            |
| n_updates          | 2              |
| policy_entropy     | 1.7916448      |
| policy_loss        | -0.00042236506 |
| serial_timesteps   | 6000           |
| time_elapsed       | 9.2            |
| total_timesteps    | 6000           |
| value_loss         | 0.034044612    |


--------------------------------------
| approxkl           | 0.0022577809  |
| clipfrac           | 0.0041666664  |
| explained_variance | 0.103         |
| fps                | 361           |
| n_updates          | 16            |
| policy_entropy     | 1.4430704     |
| policy_loss        | -0.0012121752 |
| serial_timesteps   | 48000         |
| time_elapsed       | 126           |
| total_timesteps    | 48000         |
| value_loss         | 0.006549655   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0026552563  |
| clipfrac           | 0.0063333334  |
| explained_variance | 0.139         |
| fps                | 363           |
| n_updates          | 17            |
| policy_entropy     | 1.3786236     |
| policy_loss        | -0.0016046569 |
| serial_timesteps   | 51000         |
| time_elapsed       | 135           |
| total_timesteps    | 51000         |
| value_loss         | 0.022937864   |
-------------------------

---------------------------------------
| approxkl           | 0.0011260578   |
| clipfrac           | 0.00016666666  |
| explained_variance | 0.0236         |
| fps                | 358            |
| n_updates          | 33             |
| policy_entropy     | 1.4685467      |
| policy_loss        | -1.9655727e-05 |
| serial_timesteps   | 99000          |
| time_elapsed       | 267            |
| total_timesteps    | 99000          |
| value_loss         | 0.016151113    |
---------------------------------------
--------------------------------------
| approxkl           | 0.0011536772  |
| clipfrac           | 0.00016666666 |
| explained_variance | 0.064         |
| fps                | 340           |
| n_updates          | 34            |
| policy_entropy     | 1.4444449     |
| policy_loss        | -0.0009538466 |
| serial_timesteps   | 102000        |
| time_elapsed       | 275           |
| total_timesteps    | 102000        |
| value_loss         | 0.007160855   |
------------

--------------------------------------
| approxkl           | 0.00066480244 |
| clipfrac           | 0.0           |
| explained_variance | -0.0305       |
| fps                | 361           |
| n_updates          | 50            |
| policy_entropy     | 1.4623702     |
| policy_loss        | -0.0006040459 |
| serial_timesteps   | 150000        |
| time_elapsed       | 414           |
| total_timesteps    | 150000        |
| value_loss         | 0.013220202   |
--------------------------------------
---------------------------------------
| approxkl           | 0.0016541269   |
| clipfrac           | 0.0            |
| explained_variance | -0.04          |
| fps                | 377            |
| n_updates          | 51             |
| policy_entropy     | 1.4175627      |
| policy_loss        | -0.00028906035 |
| serial_timesteps   | 153000         |
| time_elapsed       | 422            |
| total_timesteps    | 153000         |
| value_loss         | 0.013753798    |
-------------

--------------------------------------
| approxkl           | 0.0030446886  |
| clipfrac           | 0.0           |
| explained_variance | -0.00201      |
| fps                | 357           |
| n_updates          | 67            |
| policy_entropy     | 1.4522601     |
| policy_loss        | -0.0014167291 |
| serial_timesteps   | 201000        |
| time_elapsed       | 552           |
| total_timesteps    | 201000        |
| value_loss         | 0.012087374   |
--------------------------------------
--------------------------------------
| approxkl           | 0.002977847   |
| clipfrac           | 0.018666666   |
| explained_variance | -0.00248      |
| fps                | 379           |
| n_updates          | 68            |
| policy_entropy     | 1.5069815     |
| policy_loss        | -0.0010681911 |
| serial_timesteps   | 204000        |
| time_elapsed       | 561           |
| total_timesteps    | 204000        |
| value_loss         | 0.011422552   |
-------------------------

--------------------------------------
| approxkl           | 0.0014642321  |
| clipfrac           | 0.0           |
| explained_variance | 0.00322       |
| fps                | 373           |
| n_updates          | 84            |
| policy_entropy     | 1.4644384     |
| policy_loss        | -0.0007148174 |
| serial_timesteps   | 252000        |
| time_elapsed       | 690           |
| total_timesteps    | 252000        |
| value_loss         | 0.006570088   |
--------------------------------------
--------------------------------------
| approxkl           | 0.004366501   |
| clipfrac           | 0.0315        |
| explained_variance | -0.0273       |
| fps                | 354           |
| n_updates          | 85            |
| policy_entropy     | 1.4026688     |
| policy_loss        | -0.0010627045 |
| serial_timesteps   | 255000        |
| time_elapsed       | 698           |
| total_timesteps    | 255000        |
| value_loss         | 0.007253631   |
-------------------------

---------------------------------------
| approxkl           | 0.0014557499   |
| clipfrac           | 0.0            |
| explained_variance | -0.0502        |
| fps                | 318            |
| n_updates          | 101            |
| policy_entropy     | 1.5644686      |
| policy_loss        | -0.00051515317 |
| serial_timesteps   | 303000         |
| time_elapsed       | 834            |
| total_timesteps    | 303000         |
| value_loss         | 0.0054544904   |
---------------------------------------
--------------------------------------
| approxkl           | 0.0026219252  |
| clipfrac           | 0.015666667   |
| explained_variance | -0.00673      |
| fps                | 335           |
| n_updates          | 102           |
| policy_entropy     | 1.556096      |
| policy_loss        | -0.0037194057 |
| serial_timesteps   | 306000        |
| time_elapsed       | 844           |
| total_timesteps    | 306000        |
| value_loss         | 0.0062349397  |
------------

-------------------------------------
| approxkl           | 0.0019287885 |
| clipfrac           | 0.016166667  |
| explained_variance | -0.0467      |
| fps                | 367          |
| n_updates          | 118          |
| policy_entropy     | 1.5301725    |
| policy_loss        | -0.00213632  |
| serial_timesteps   | 354000       |
| time_elapsed       | 981          |
| total_timesteps    | 354000       |
| value_loss         | 0.009267058  |
-------------------------------------
-------------------------------------
| approxkl           | 0.004249145  |
| clipfrac           | 0.015333333  |
| explained_variance | -0.0379      |
| fps                | 368          |
| n_updates          | 119          |
| policy_entropy     | 1.5197163    |
| policy_loss        | -0.004272666 |
| serial_timesteps   | 357000       |
| time_elapsed       | 989          |
| total_timesteps    | 357000       |
| value_loss         | 0.009735808  |
-------------------------------------
------------

---------------------------------------
| approxkl           | 0.003230913    |
| clipfrac           | 0.0            |
| explained_variance | 0.0156         |
| fps                | 369            |
| n_updates          | 135            |
| policy_entropy     | 1.4455458      |
| policy_loss        | -0.00089358946 |
| serial_timesteps   | 405000         |
| time_elapsed       | 1.12e+03       |
| total_timesteps    | 405000         |
| value_loss         | 0.007861909    |
---------------------------------------
---------------------------------------
| approxkl           | 0.0018953962   |
| clipfrac           | 0.041416667    |
| explained_variance | 0.00284        |
| fps                | 360            |
| n_updates          | 136            |
| policy_entropy     | 1.475999       |
| policy_loss        | -0.00097035454 |
| serial_timesteps   | 408000         |
| time_elapsed       | 1.13e+03       |
| total_timesteps    | 408000         |
| value_loss         | 0.008280819    |


---------------------------------------
| approxkl           | 0.0027022064   |
| clipfrac           | 0.027333334    |
| explained_variance | 0.00188        |
| fps                | 371            |
| n_updates          | 152            |
| policy_entropy     | 1.1196042      |
| policy_loss        | -0.00078585086 |
| serial_timesteps   | 456000         |
| time_elapsed       | 1.26e+03       |
| total_timesteps    | 456000         |
| value_loss         | 0.008403426    |
---------------------------------------
--------------------------------------
| approxkl           | 0.0013289773  |
| clipfrac           | 0.0           |
| explained_variance | 0.0104        |
| fps                | 377           |
| n_updates          | 153           |
| policy_entropy     | 1.048556      |
| policy_loss        | -0.0012482484 |
| serial_timesteps   | 459000        |
| time_elapsed       | 1.27e+03      |
| total_timesteps    | 459000        |
| value_loss         | 0.009150036   |
------------



Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished
Episode 7 finished
Episode 8 finished
Episode 9 finished
Episode 10 finished
Episode 11 finished
Episode 12 finished
Episode 13 finished
Episode 14 finished
Episode 15 finished
Episode 16 finished
Episode 17 finished
Episode 18 finished
Episode 19 finished
Win  0 / 20  games
Tie  20 / 20  games
Lose  0 / 20  games
1 epoch training time: 2017.9129946231842
Training models/lesson2e


Loading a model without an environment, this model cannot be trained until it has a valid environment.




--------------------------------------
| approxkl           | 1.1387585e-05 |
| clipfrac           | 0.0           |
| explained_variance | -0.0824       |
| fps                | 347           |
| n_updates          | 1             |
| policy_entropy     | 1.7916946     |
| policy_loss        | -0.0002586567 |
| serial_timesteps   | 3000          |
| time_elapsed       | 1.91e-06      |
| total_timesteps    | 3000          |
| value_loss         | 1.0291901     |
--------------------------------------
--------------------------------------
| approxkl           | 4.1571053e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.107         |
| fps                | 355           |
| n_updates          | 2             |
| policy_entropy     | 1.7915008     |
| policy_loss        | -0.0007029959 |
| serial_timesteps   | 6000          |
| time_elapsed       | 8.64          |
| total_timesteps    | 6000          |
| value_loss         | 0.311091      |
-------------------------

--------------------------------------
| approxkl           | 0.0019629144  |
| clipfrac           | 0.0032500003  |
| explained_variance | 0.391         |
| fps                | 340           |
| n_updates          | 14            |
| policy_entropy     | 1.4311659     |
| policy_loss        | -0.0038588012 |
| serial_timesteps   | 42000         |
| time_elapsed       | 110           |
| total_timesteps    | 42000         |
| value_loss         | 0.015633937   |
--------------------------------------
--------------------------------------
| approxkl           | 0.008318486   |
| clipfrac           | 0.09033333    |
| explained_variance | 0.19          |
| fps                | 361           |
| n_updates          | 15            |
| policy_entropy     | 1.4630485     |
| policy_loss        | -0.0020164389 |
| serial_timesteps   | 45000         |
| time_elapsed       | 119           |
| total_timesteps    | 45000         |
| value_loss         | 0.014692108   |
-------------------------

--------------------------------------
| approxkl           | 0.0003856137  |
| clipfrac           | 0.0           |
| explained_variance | 0.113         |
| fps                | 358           |
| n_updates          | 31            |
| policy_entropy     | 1.3300177     |
| policy_loss        | -6.725255e-06 |
| serial_timesteps   | 93000         |
| time_elapsed       | 254           |
| total_timesteps    | 93000         |
| value_loss         | 0.015975669   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0029983134  |
| clipfrac           | 0.0014166667  |
| explained_variance | 0.0176        |
| fps                | 340           |
| n_updates          | 32            |
| policy_entropy     | 1.3112472     |
| policy_loss        | -0.0011098953 |
| serial_timesteps   | 96000         |
| time_elapsed       | 263           |
| total_timesteps    | 96000         |
| value_loss         | 0.014163317   |
-------------------------

--------------------------------------
| approxkl           | 0.005151097   |
| clipfrac           | 0.060833335   |
| explained_variance | 0.059         |
| fps                | 361           |
| n_updates          | 48            |
| policy_entropy     | 1.1035459     |
| policy_loss        | -0.0025079518 |
| serial_timesteps   | 144000        |
| time_elapsed       | 400           |
| total_timesteps    | 144000        |
| value_loss         | 0.023352487   |
--------------------------------------
---------------------------------------
| approxkl           | 0.0015965386   |
| clipfrac           | 0.030749999    |
| explained_variance | 0.105          |
| fps                | 365            |
| n_updates          | 49             |
| policy_entropy     | 1.1614678      |
| policy_loss        | -0.00078490784 |
| serial_timesteps   | 147000         |
| time_elapsed       | 409            |
| total_timesteps    | 147000         |
| value_loss         | 0.0052901544   |
-------------

---------------------------------------
| approxkl           | 0.0006867716   |
| clipfrac           | 0.00083333335  |
| explained_variance | 0.0359         |
| fps                | 369            |
| n_updates          | 65             |
| policy_entropy     | 1.1540376      |
| policy_loss        | -0.00029916922 |
| serial_timesteps   | 195000         |
| time_elapsed       | 543            |
| total_timesteps    | 195000         |
| value_loss         | 0.008230025    |
---------------------------------------
--------------------------------------
| approxkl           | 0.0038926469  |
| clipfrac           | 0.03475       |
| explained_variance | -0.0476       |
| fps                | 362           |
| n_updates          | 66            |
| policy_entropy     | 1.1939733     |
| policy_loss        | -0.0024282662 |
| serial_timesteps   | 198000        |
| time_elapsed       | 551           |
| total_timesteps    | 198000        |
| value_loss         | 0.014897183   |
------------

--------------------------------------
| approxkl           | 0.00057646574 |
| clipfrac           | 0.0           |
| explained_variance | 0.0394        |
| fps                | 342           |
| n_updates          | 82            |
| policy_entropy     | 1.032691      |
| policy_loss        | -0.0005887632 |
| serial_timesteps   | 246000        |
| time_elapsed       | 685           |
| total_timesteps    | 246000        |
| value_loss         | 0.0066867676  |
--------------------------------------
--------------------------------------
| approxkl           | 0.005736299   |
| clipfrac           | 0.11916666    |
| explained_variance | 0.058         |
| fps                | 369           |
| n_updates          | 83            |
| policy_entropy     | 1.1052139     |
| policy_loss        | -0.0016366711 |
| serial_timesteps   | 249000        |
| time_elapsed       | 694           |
| total_timesteps    | 249000        |
| value_loss         | 0.00407021    |
-------------------------

--------------------------------------
| approxkl           | 0.0020681224  |
| clipfrac           | 0.007         |
| explained_variance | -0.00959      |
| fps                | 365           |
| n_updates          | 99            |
| policy_entropy     | 1.0421141     |
| policy_loss        | -0.0009393185 |
| serial_timesteps   | 297000        |
| time_elapsed       | 831           |
| total_timesteps    | 297000        |
| value_loss         | 0.0086734835  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0021660994  |
| clipfrac           | 0.033083335   |
| explained_variance | 0.0452        |
| fps                | 347           |
| n_updates          | 100           |
| policy_entropy     | 0.9540777     |
| policy_loss        | -0.0012501766 |
| serial_timesteps   | 300000        |
| time_elapsed       | 839           |
| total_timesteps    | 300000        |
| value_loss         | 0.0069467947  |
-------------------------

--------------------------------------
| approxkl           | 0.0014903992  |
| clipfrac           | 0.009666666   |
| explained_variance | -0.0002       |
| fps                | 365           |
| n_updates          | 116           |
| policy_entropy     | 0.9498419     |
| policy_loss        | 0.00015179174 |
| serial_timesteps   | 348000        |
| time_elapsed       | 972           |
| total_timesteps    | 348000        |
| value_loss         | 0.008157806   |
--------------------------------------
---------------------------------------
| approxkl           | 0.00072017516  |
| clipfrac           | 0.0035833332   |
| explained_variance | 0.0137         |
| fps                | 360            |
| n_updates          | 117            |
| policy_entropy     | 0.92214346     |
| policy_loss        | -0.00031436776 |
| serial_timesteps   | 351000         |
| time_elapsed       | 981            |
| total_timesteps    | 351000         |
| value_loss         | 0.011849216    |
-------------

-------------------------------------
| approxkl           | 0.0020739154 |
| clipfrac           | 0.027416667  |
| explained_variance | 0.00364      |
| fps                | 349          |
| n_updates          | 133          |
| policy_entropy     | 0.83209795   |
| policy_loss        | -0.001450794 |
| serial_timesteps   | 399000       |
| time_elapsed       | 1.11e+03     |
| total_timesteps    | 399000       |
| value_loss         | 0.008267305  |
-------------------------------------
---------------------------------------
| approxkl           | 0.0027164305   |
| clipfrac           | 0.019666668    |
| explained_variance | -0.0167        |
| fps                | 364            |
| n_updates          | 134            |
| policy_entropy     | 0.89575857     |
| policy_loss        | -0.00024098424 |
| serial_timesteps   | 402000         |
| time_elapsed       | 1.12e+03       |
| total_timesteps    | 402000         |
| value_loss         | 0.010506411    |
--------------------------

--------------------------------------
| approxkl           | 0.0021714026  |
| clipfrac           | 0.019833334   |
| explained_variance | 0.00702       |
| fps                | 372           |
| n_updates          | 150           |
| policy_entropy     | 0.9332699     |
| policy_loss        | -0.0008013965 |
| serial_timesteps   | 450000        |
| time_elapsed       | 1.25e+03      |
| total_timesteps    | 450000        |
| value_loss         | 0.0111156795  |
--------------------------------------
--------------------------------------
| approxkl           | 0.001102859   |
| clipfrac           | 0.0           |
| explained_variance | 0.00958       |
| fps                | 348           |
| n_updates          | 151           |
| policy_entropy     | 0.8598305     |
| policy_loss        | -0.0010172639 |
| serial_timesteps   | 453000        |
| time_elapsed       | 1.26e+03      |
| total_timesteps    | 453000        |
| value_loss         | 0.011772973   |
-------------------------

Loading a model without an environment, this model cannot be trained until it has a valid environment.


Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished
Episode 7 finished
Episode 8 finished
Episode 9 finished
Episode 10 finished
Episode 11 finished
Episode 12 finished
Episode 13 finished
Episode 14 finished
Episode 15 finished
Episode 16 finished
Episode 17 finished
Episode 18 finished
Episode 19 finished
Win  0 / 20  games
Tie  20 / 20  games
Lose  0 / 20  games
1 epoch training time: 2032.2098007202148
Training models/lesson2e


Loading a model without an environment, this model cannot be trained until it has a valid environment.




---------------------------------------
| approxkl           | 1.3296891e-05  |
| clipfrac           | 0.0            |
| explained_variance | -0.177         |
| fps                | 348            |
| n_updates          | 1              |
| policy_entropy     | 1.7916985      |
| policy_loss        | -0.00027051865 |
| serial_timesteps   | 3000           |
| time_elapsed       | 2.38e-06       |
| total_timesteps    | 3000           |
| value_loss         | 0.393276       |
---------------------------------------
--------------------------------------
| approxkl           | 4.6557187e-05 |
| clipfrac           | 0.0           |
| explained_variance | -0.0567       |
| fps                | 362           |
| n_updates          | 2             |
| policy_entropy     | 1.7914622     |
| policy_loss        | -0.0005166198 |
| serial_timesteps   | 6000          |
| time_elapsed       | 8.62          |
| total_timesteps    | 6000          |
| value_loss         | 0.15003952    |
------------

--------------------------------------
| approxkl           | 0.0018494952  |
| clipfrac           | 0.015666666   |
| explained_variance | 0.137         |
| fps                | 346           |
| n_updates          | 15            |
| policy_entropy     | 1.5031799     |
| policy_loss        | -0.0009204735 |
| serial_timesteps   | 45000         |
| time_elapsed       | 116           |
| total_timesteps    | 45000         |
| value_loss         | 0.012505013   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0017857346  |
| clipfrac           | 0.015333334   |
| explained_variance | 0.196         |
| fps                | 365           |
| n_updates          | 16            |
| policy_entropy     | 1.4867551     |
| policy_loss        | -0.0016595242 |
| serial_timesteps   | 48000         |
| time_elapsed       | 125           |
| total_timesteps    | 48000         |
| value_loss         | 0.003308793   |
-------------------------

--------------------------------------
| approxkl           | 0.003684245   |
| clipfrac           | 0.0           |
| explained_variance | -0.00941      |
| fps                | 369           |
| n_updates          | 32            |
| policy_entropy     | 1.5907396     |
| policy_loss        | -0.0015609204 |
| serial_timesteps   | 96000         |
| time_elapsed       | 255           |
| total_timesteps    | 96000         |
| value_loss         | 0.015189599   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0034850584  |
| clipfrac           | 0.05883333    |
| explained_variance | 0.0469        |
| fps                | 346           |
| n_updates          | 33            |
| policy_entropy     | 1.5991957     |
| policy_loss        | -0.0028142475 |
| serial_timesteps   | 99000         |
| time_elapsed       | 264           |
| total_timesteps    | 99000         |
| value_loss         | 0.0113011     |
-------------------------

--------------------------------------
| approxkl           | 0.0032358496  |
| clipfrac           | 0.019         |
| explained_variance | 0.00681       |
| fps                | 370           |
| n_updates          | 49            |
| policy_entropy     | 1.4238063     |
| policy_loss        | -0.0022800707 |
| serial_timesteps   | 147000        |
| time_elapsed       | 394           |
| total_timesteps    | 147000        |
| value_loss         | 0.009958512   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0036976584  |
| clipfrac           | 0.009416667   |
| explained_variance | -0.0204       |
| fps                | 377           |
| n_updates          | 50            |
| policy_entropy     | 1.3199263     |
| policy_loss        | -0.0005094009 |
| serial_timesteps   | 150000        |
| time_elapsed       | 403           |
| total_timesteps    | 150000        |
| value_loss         | 0.013192798   |
-------------------------

--------------------------------------
| approxkl           | 0.003590039   |
| clipfrac           | 0.034         |
| explained_variance | 0.0234        |
| fps                | 370           |
| n_updates          | 66            |
| policy_entropy     | 1.419612      |
| policy_loss        | -0.0015242533 |
| serial_timesteps   | 198000        |
| time_elapsed       | 535           |
| total_timesteps    | 198000        |
| value_loss         | 0.018100135   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0026677959 |
| clipfrac           | 0.018916667  |
| explained_variance | -0.0531      |
| fps                | 358          |
| n_updates          | 67           |
| policy_entropy     | 1.4203947    |
| policy_loss        | -0.001995022 |
| serial_timesteps   | 201000       |
| time_elapsed       | 543          |
| total_timesteps    | 201000       |
| value_loss         | 0.0072292723 |
-------------------------------------

--------------------------------------
| approxkl           | 0.004426371   |
| clipfrac           | 0.054333337   |
| explained_variance | -0.0332       |
| fps                | 379           |
| n_updates          | 83            |
| policy_entropy     | 1.2200143     |
| policy_loss        | -0.0023592035 |
| serial_timesteps   | 249000        |
| time_elapsed       | 673           |
| total_timesteps    | 249000        |
| value_loss         | 0.005755998   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0003951204  |
| clipfrac           | 0.0           |
| explained_variance | -0.0165       |
| fps                | 369           |
| n_updates          | 84            |
| policy_entropy     | 1.2356379     |
| policy_loss        | 0.00013398047 |
| serial_timesteps   | 252000        |
| time_elapsed       | 681           |
| total_timesteps    | 252000        |
| value_loss         | 0.005635504   |
-------------------------

-------------------------------------
| approxkl           | 0.0033923513 |
| clipfrac           | 0.019083334  |
| explained_variance | 0.0291       |
| fps                | 362          |
| n_updates          | 100          |
| policy_entropy     | 1.5232036    |
| policy_loss        | -0.003213962 |
| serial_timesteps   | 300000       |
| time_elapsed       | 812          |
| total_timesteps    | 300000       |
| value_loss         | 0.0066425884 |
-------------------------------------
---------------------------------------
| approxkl           | 0.0014346224   |
| clipfrac           | 0.0            |
| explained_variance | -0.00605       |
| fps                | 368            |
| n_updates          | 101            |
| policy_entropy     | 1.5322404      |
| policy_loss        | -2.0782412e-05 |
| serial_timesteps   | 303000         |
| time_elapsed       | 820            |
| total_timesteps    | 303000         |
| value_loss         | 0.0069861496   |
--------------------------

--------------------------------------
| approxkl           | 0.0020899284  |
| clipfrac           | 0.0           |
| explained_variance | 0.0126        |
| fps                | 378           |
| n_updates          | 117           |
| policy_entropy     | 1.4931637     |
| policy_loss        | -0.0016727332 |
| serial_timesteps   | 351000        |
| time_elapsed       | 951           |
| total_timesteps    | 351000        |
| value_loss         | 0.0094449865  |
--------------------------------------
-------------------------------------
| approxkl           | 0.0062326295 |
| clipfrac           | 0.11925      |
| explained_variance | 0.00629      |
| fps                | 349          |
| n_updates          | 118          |
| policy_entropy     | 1.459166     |
| policy_loss        | -0.003179643 |
| serial_timesteps   | 354000       |
| time_elapsed       | 959          |
| total_timesteps    | 354000       |
| value_loss         | 0.012261214  |
-------------------------------------

--------------------------------------
| approxkl           | 0.005220938   |
| clipfrac           | 0.03266667    |
| explained_variance | 0.0167        |
| fps                | 357           |
| n_updates          | 134           |
| policy_entropy     | 1.0395933     |
| policy_loss        | -0.0016484516 |
| serial_timesteps   | 402000        |
| time_elapsed       | 1.09e+03      |
| total_timesteps    | 402000        |
| value_loss         | 0.010079969   |
--------------------------------------
------------------------------------
| approxkl           | 0.005621397 |
| clipfrac           | 0.07808334  |
| explained_variance | -0.00448    |
| fps                | 364         |
| n_updates          | 135         |
| policy_entropy     | 0.891984    |
| policy_loss        | -0.00245237 |
| serial_timesteps   | 405000      |
| time_elapsed       | 1.1e+03     |
| total_timesteps    | 405000      |
| value_loss         | 0.015449117 |
------------------------------------
------------

---------------------------------------
| approxkl           | 0.0010627253   |
| clipfrac           | 0.00066666666  |
| explained_variance | -0.0011        |
| fps                | 370            |
| n_updates          | 151            |
| policy_entropy     | 1.0159934      |
| policy_loss        | -0.00060128106 |
| serial_timesteps   | 453000         |
| time_elapsed       | 1.23e+03       |
| total_timesteps    | 453000         |
| value_loss         | 0.008182365    |
---------------------------------------
--------------------------------------
| approxkl           | 0.0009987903  |
| clipfrac           | 0.020583333   |
| explained_variance | 0.00265       |
| fps                | 353           |
| n_updates          | 152           |
| policy_entropy     | 1.0296205     |
| policy_loss        | -0.0014780193 |
| serial_timesteps   | 456000        |
| time_elapsed       | 1.24e+03      |
| total_timesteps    | 456000        |
| value_loss         | 0.010146543   |
------------



Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished


KeyboardInterrupt: 

In [None]:
#showcase of the best agent, as lame as it is
path = 'curriculum models/lesson2d'
env = env_list[4]
print(env_names[4])

validate(env, path, 10, 2)

In [446]:
history.append((wins,draws, losses))

In [449]:


for w, d, l in history:
    print(w)
    print(d)
    print(l)
    print()
    

print(wins)
print(draws)
print(losses)


[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[17, 20, 20, 19, 20, 19, 20, 20, 18, 20, 20, 20, 20, 20, 19, 20, 20, 19]
[3, 0, 0, 1, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 1]


True

In [486]:
history_no_learn = history