In [1]:
gameFolder = "DOA++-MAME"
#gameFolder = "SFIII-MAME"
#gameFolder = "UMK3-MAME"
#gameFolder = "TEKTAG-MAME"

In [2]:
import sys, os
import time
timeDepSeed = int((time.time()-int(time.time()-0.5))*1000)

sys.path.append(os.path.join(os.path.abspath(''), '../../games',gameFolder))   

tensorBoardFolder = "./ppo2_TB_CustCnnLnLstm_" + gameFolder + "/"
modelFolder = "ppo2_Model_CustCnnLnLstm_" + gameFolder

In [3]:
from makeDiambraEnv import *

import tensorflow as tf

%load_ext tensorboard

from stable_baselines.common.policies import RecurrentActorCriticPolicy
from stable_baselines.common.policies import *
from stable_baselines import PPO2
from stable_baselines.common.evaluation import evaluate_policy

In [4]:
class CustomCnnLstmPolicy(RecurrentActorCriticPolicy):
    """
    Policy object that implements actor critic, using LSTMs.

    :param sess: (TensorFlow session) The current TensorFlow session
    :param ob_space: (Gym Space) The observation space of the environment
    :param ac_space: (Gym Space) The action space of the environment
    :param n_env: (int) The number of environments to run
    :param n_steps: (int) The number of steps to run for each environment
    :param n_batch: (int) The number of batch to run (n_envs * n_steps)
    :param n_lstm: (int) The number of LSTM cells (for recurrent policies)
    :param reuse: (bool) If the policy is reusable or not
    :param layers: ([int]) The size of the Neural network before the LSTM layer  (if None, default to [64, 64])
    :param net_arch: (list) Specification of the actor-critic policy network architecture. Notation similar to the
        format described in mlp_extractor but with additional support for a 'lstm' entry in the shared network part.
    :param act_fun: (tf.func) the activation function to use in the neural network.
    :param cnn_extractor: (function (TensorFlow Tensor, ``**kwargs``): (TensorFlow Tensor)) the CNN feature extraction
    :param layer_norm: (bool) Whether or not to use layer normalizing LSTMs
    :param feature_extraction: (str) The feature extraction type ("cnn" or "mlp")
    :param kwargs: (dict) Extra keyword arguments for the nature CNN feature extraction
    """

    recurrent = True

    def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, n_lstm=256, reuse=False, 
                 layers=None, net_arch=None, act_fun=tf.tanh, cnn_extractor=nature_cnn, 
                 layer_norm=True, feature_extraction="cnn", **kwargs):
        
        # state_shape = [n_lstm * 2] dim because of the cell and hidden states of the LSTM
        super(CustomCnnLstmPolicy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch,
                                         state_shape=(2 * n_lstm, ), reuse=reuse,
                                         scale=(feature_extraction == "cnn"))

      
        self._kwargs_check(feature_extraction, kwargs)
       
        frames = self.processed_obs[:,:,:,0:4]
        additional_input = self.processed_obs[:,:,:,4]
        additional_input = tf.layers.flatten(additional_input)
        additional_input = additional_input[:,1:149]
        
        if layers is None:
            layers = [64, 64]
        else:
            warnings.warn("The layers parameter is deprecated. Use the net_arch parameter instead.")

        with tf.variable_scope("model", reuse=reuse):
            
            # Frames (CNN)
            extracted_features_frames = cnn_extractor(frames, **kwargs)
            
            # Additional (Additional Info)
            for i, layer_size in enumerate(layers):
                extracted_features_addinfo = act_fun(linear(additional_input, 'pi_fc' + str(i), 
                                                            n_hidden=layer_size, init_scale=np.sqrt(2)))
            
            extracted_features = tf.concat([extracted_features_frames, extracted_features_addinfo], 1)
            
            #print("Extracted feat frames")
            #print(extracted_features_frames)
            #print("Extracted feat additional info")
            #print(extracted_features_addinfo)
            #print("Extracted feat")
            #print(extracted_features)
            input_sequence = batch_to_seq(extracted_features, self.n_env, n_steps)
            #print("input_sequence")
            #print(input_sequence)
            #print("dones_ph")
            #print(self.dones_ph)
            masks = batch_to_seq(self.dones_ph, self.n_env, n_steps)
            #print("masks")
            #print(masks)
            rnn_output, self.snew = lstm(input_sequence, masks, self.states_ph, 'lstm1', n_hidden=n_lstm,
                                         layer_norm=layer_norm)
            #print("rnn_output")
            #print(rnn_output)
            #input("Pause")
            rnn_output = seq_to_batch(rnn_output)
            value_fn = linear(rnn_output, 'vf', 1)

            self._proba_distribution, self._policy, self.q_value = \
                self.pdtype.proba_distribution_from_latent(rnn_output, rnn_output)

        self._value_fn = value_fn
        
        self._setup_init()

    def step(self, obs, state=None, mask=None, deterministic=False):
        if deterministic:
            return self.sess.run([self.deterministic_action, self.value_flat, self.snew, self.neglogp],
                                 {self.obs_ph: obs, self.states_ph: state, self.dones_ph: mask})
        else:
            return self.sess.run([self.action, self.value_flat, self.snew, self.neglogp],
                                 {self.obs_ph: obs, self.states_ph: state, self.dones_ph: mask})

    def proba_step(self, obs, state=None, mask=None):
        return self.sess.run(self.policy_proba, {self.obs_ph: obs, self.states_ph: state, self.dones_ph: mask})

    def value(self, obs, state=None, mask=None):
        return self.sess.run(self.value_flat, {self.obs_ph: obs, self.states_ph: state, self.dones_ph: mask})


In [9]:
diambraKwargs = {}
diambraKwargs["roms_path"] = "../../roms/MAMEToolkit/roms/"
diambraKwargs["binary_path"] = "../../../customMAME/"
diambraKwargs["player"] = "P1"
diambraKwargs["frame_ratio"] = 3
#diambraKwargs["render"] =True
#diambraKwargs["throttle"] = False
#diambraKwargs["sound"] = False 
#diambraKwargs["character"] ="Random"
diambraKwargs["character"] = "Kasumi"

wrapperKwargs = {}
wrapperKwargs["frame_stack"] = 4
wrapperKwargs["clip_rewards"] = False
wrapperKwargs["normalize_rewards"] = True
wrapperKwargs["scale"] = True
wrapperKwargs["hw_obs_resize"] = [256, 256]

#keyToAdd = None
keyToAdd = []
keyToAdd.append("actionsBuf")
#keyToAdd.append("player")
keyToAdd.append("healthP1")
keyToAdd.append("healthP2")
keyToAdd.append("positionP1")
keyToAdd.append("positionP2")
#keyToAdd.append("winsP1")
#keyToAdd.append("winsP2")

numEnv=1

env = make_diambra_env(diambraMame, env_prefix="Train", num_env=numEnv, seed=timeDepSeed, continue_game = False,
                       diambra_kwargs = diambraKwargs, wrapper_kwargs = wrapperKwargs, 
                       key_to_add = keyToAdd)

# OR 
#env = make_diambra_env(diambraMame, num_env=2, seed=0, diambra_kwargs = diambraKwargs)
# Frame-stacking with 4 frames
#env = VecFrameStack(env, n_stack=4)

Env_id =  Train0
Continue rule =  False
Player = P1 , Character = Kasumi
Noop action N =  11


In [None]:
print("Obs_space = ", env.observation_space)
print("Obs_space type = ", env.observation_space.dtype)
print("Obs_space high = ", env.observation_space.high)
print("Obs_space low = ", env.observation_space.low)

In [None]:
print("Act_space = ", env.action_space)
print("Act_space type = ", env.action_space.dtype)
print("Act_space n = ", env.action_space.n)

In [10]:
policyKwargs={}
policyKwargs["layers"] = [128, 256]

# Initialize the model, 1 env
#model = PPO2(CustomCnnLstmPolicy, env, nminibatches=1, verbose=1, 
#             tensorboard_log=tensorBoardFolder, policy_kwargs=policyKwargs, gamma = 0.9)

#OR

# Load the trained agent, 1 env
model = PPO2.load(modelFolder+"2Msteps_action+_add", env=env, tensorboard_log=tensorBoardFolder, 
                  policy_kwargs=policyKwargs, gamma = 0.9)

























Instructions for updating:
Use keras.layers.flatten instead.


Instructions for updating:
Use keras.layers.flatten instead.




















Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where














In [None]:
# Train the agent
time_steps = 10000000
model.learn(total_timesteps=time_steps)







Setting difficulty = 3
Starting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 4.0789753e-05 |
| clipfrac           | 0.0           |
| explained_variance | -0.0119       |
| fps                | 2             |
| n_updates          | 1             |
| policy_entropy     | 1.6001147     |
| policy_loss        | -0.0013653897 |
| serial_timesteps   | 128           |
| time_elapsed       | 1.88e-05      |
| total_timesteps    | 128           |
| value_loss         | 0.003962788   |
--------------------------------------
---------------------------------------
| approxkl           | 1.25722145e-05 |
| clipfrac           | 0.0            |
| explained_variance | 0.0217         |
| fps                | 24             |
| n_updates          | 2              |
| policy_entropy     | 1.9708548      |
| policy_loss        | -0.0006090328  |
| serial_timesteps   | 256            |
| time_elapsed       | 44.3           |
| total_time

--------------------------------------
| approxkl           | 1.4027405e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.00131       |
| fps                | 24            |
| n_updates          | 17            |
| policy_entropy     | 2.1540644     |
| policy_loss        | -0.0002354905 |
| serial_timesteps   | 2176          |
| time_elapsed       | 164           |
| total_timesteps    | 2176          |
| value_loss         | 0.002680947   |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 8.313155e-07  |
| clipfrac           | 0.0           |
| explained_variance | -0.000547     |
| fps                | 12            |
| n_updates          | 18            |
| policy_entropy     | 2.1564045     |
| policy_loss        | -9.794533e-05 |
| serial_timesteps   | 2304          |
| time_elapsed       | 169           |
| total_timesteps    | 2304          |
| value_loss         | 0.010908103   |
--------------

Round done
---------------------------------------
| approxkl           | 1.3765451e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.99e+03       |
| ep_reward_mean     | 6.72           |
| explained_variance | -0.0766        |
| fps                | 11             |
| n_updates          | 33             |
| policy_entropy     | 1.9954724      |
| policy_loss        | -1.5996397e-05 |
| serial_timesteps   | 4224           |
| time_elapsed       | 291            |
| total_timesteps    | 4224           |
| value_loss         | 0.003941361    |
---------------------------------------
---------------------------------------
| approxkl           | 8.440321e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.99e+03       |
| ep_reward_mean     | 6.72           |
| explained_variance | -0.00188       |
| fps                | 24             |
| n_updates          | 34             |
| policy_entropy     | 2.060729       |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 1.0455842e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 2.92e+03      |
| ep_reward_mean     | 5.35          |
| explained_variance | 0.0858        |
| fps                | 24            |
| n_updates          | 47            |
| policy_entropy     | 1.9043531     |
| policy_loss        | -8.40947e-05  |
| serial_timesteps   | 6016          |
| time_elapsed       | 409           |
| total_timesteps    | 6016          |
| value_loss         | 0.0065526366  |
--------------------------------------
---------------------------------------
| approxkl           | 2.6895104e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 2.92e+03       |
| ep_reward_mean     | 5.35           |
| explained_variance | 0.0165         |
| fps                | 24             |
| n_updates          | 48             |
| policy_entropy     | 2.0478058      |
| policy_loss        | -0.00016534887 |
| serial_timest

Round done
---------------------------------------
| approxkl           | 6.715801e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 2.92e+03       |
| ep_reward_mean     | 5.35           |
| explained_variance | 0.000551       |
| fps                | 11             |
| n_updates          | 61             |
| policy_entropy     | 2.1927037      |
| policy_loss        | -0.00019744411 |
| serial_timesteps   | 7808           |
| time_elapsed       | 511            |
| total_timesteps    | 7808           |
| value_loss         | 0.006978983    |
---------------------------------------
--------------------------------------
| approxkl           | 1.5578835e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 2.92e+03      |
| ep_reward_mean     | 5.35          |
| explained_variance | 0.00182       |
| fps                | 24            |
| n_updates          | 62            |
| policy_entropy     | 2.1966536     |
| policy_loss        | -0.0001629442 |

--------------------------------------
| approxkl           | 2.45449e-06   |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.01e+03      |
| ep_reward_mean     | 5.37          |
| explained_variance | -0.00791      |
| fps                | 24            |
| n_updates          | 75            |
| policy_entropy     | 2.1830637     |
| policy_loss        | -8.676434e-05 |
| serial_timesteps   | 9600          |
| time_elapsed       | 630           |
| total_timesteps    | 9600          |
| value_loss         | 0.005367432   |
--------------------------------------
Stage done
---------------------------------------
| approxkl           | 1.3455966e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.01e+03       |
| ep_reward_mean     | 5.37           |
| explained_variance | -0.00495       |
| fps                | 9              |
| n_updates          | 76             |
| policy_entropy     | 2.1906133      |
| policy_loss        | -8.7898225e-05 |
| se

---------------------------------------
| approxkl           | 1.0362461e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.01e+03       |
| ep_reward_mean     | 5.37           |
| explained_variance | 0.000273       |
| fps                | 24             |
| n_updates          | 89             |
| policy_entropy     | 2.190981       |
| policy_loss        | -0.00013422593 |
| serial_timesteps   | 11392          |
| time_elapsed       | 738            |
| total_timesteps    | 11392          |
| value_loss         | 0.007288993    |
---------------------------------------
Stage done
--------------------------------------
| approxkl           | 1.1583396e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.01e+03      |
| ep_reward_mean     | 5.37          |
| explained_variance | -0.000986     |
| fps                | 10            |
| n_updates          | 90            |
| policy_entropy     | 2.1955266     |
| policy_loss        | 0.00023790076 |

Stage done
--------------------------------------
| approxkl           | 3.3678912e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.13e+03      |
| ep_reward_mean     | 5.47          |
| explained_variance | 0.00315       |
| fps                | 9             |
| n_updates          | 103           |
| policy_entropy     | 2.1875544     |
| policy_loss        | -7.397309e-05 |
| serial_timesteps   | 13184         |
| time_elapsed       | 849           |
| total_timesteps    | 13184         |
| value_loss         | 0.0058504445  |
--------------------------------------
---------------------------------------
| approxkl           | 2.94556e-06    |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.13e+03       |
| ep_reward_mean     | 5.47           |
| explained_variance | -0.0104        |
| fps                | 24             |
| n_updates          | 104            |
| policy_entropy     | 2.1909087      |
| policy_loss        | -0.00034618378 |
| se

Round done
---------------------------------------
| approxkl           | 2.4463723e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.13e+03       |
| ep_reward_mean     | 5.47           |
| explained_variance | 0.00291        |
| fps                | 10             |
| n_updates          | 117            |
| policy_entropy     | 2.2052982      |
| policy_loss        | 0.000115614384 |
| serial_timesteps   | 14976          |
| time_elapsed       | 958            |
| total_timesteps    | 14976          |
| value_loss         | 0.0047681294   |
---------------------------------------
---------------------------------------
| approxkl           | 6.9367155e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.13e+03       |
| ep_reward_mean     | 5.47           |
| explained_variance | -0.000433      |
| fps                | 24             |
| n_updates          | 118            |
| policy_entropy     | 2.2058074      |
| policy_loss        | -9.405

-------------------------------------
| approxkl           | 4.837601e-05 |
| clipfrac           | 0.0          |
| ep_len_mean        | 3.31e+03     |
| ep_reward_mean     | 5.32         |
| explained_variance | 0.00223      |
| fps                | 24           |
| n_updates          | 131          |
| policy_entropy     | 2.0560107    |
| policy_loss        | 0.0001719892 |
| serial_timesteps   | 16768        |
| time_elapsed       | 1.08e+03     |
| total_timesteps    | 16768        |
| value_loss         | 0.004695866  |
-------------------------------------
---------------------------------------
| approxkl           | 2.8943355e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.31e+03       |
| ep_reward_mean     | 5.32           |
| explained_variance | -0.0199        |
| fps                | 24             |
| n_updates          | 132            |
| policy_entropy     | 2.1508827      |
| policy_loss        | -6.9230795e-05 |
| serial_timesteps   | 16896  

Stage done
--------------------------------------
| approxkl           | 1.0645426e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.31e+03      |
| ep_reward_mean     | 5.32          |
| explained_variance | 0.00194       |
| fps                | 9             |
| n_updates          | 145           |
| policy_entropy     | 2.2089365     |
| policy_loss        | 0.00020359457 |
| serial_timesteps   | 18560         |
| time_elapsed       | 1.19e+03      |
| total_timesteps    | 18560         |
| value_loss         | 0.0016294541  |
--------------------------------------
---------------------------------------
| approxkl           | 8.29294e-07    |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.31e+03       |
| ep_reward_mean     | 5.32           |
| explained_variance | -0.00365       |
| fps                | 24             |
| n_updates          | 146            |
| policy_entropy     | 2.209992       |
| policy_loss        | -0.00014257617 |
| se

---------------------------------------
| approxkl           | 2.7019864e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.3e+03        |
| ep_reward_mean     | 5.7            |
| explained_variance | -0.00204       |
| fps                | 23             |
| n_updates          | 159            |
| policy_entropy     | 2.211505       |
| policy_loss        | -0.00026165694 |
| serial_timesteps   | 20352          |
| time_elapsed       | 1.3e+03        |
| total_timesteps    | 20352          |
| value_loss         | 0.009867623    |
---------------------------------------
Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 9.279544e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 2.92e+03      |
| ep_reward_mean     | 4.68          |
| explained_variance | 0.108         |
| fps                | 8             |
| n_updates          | 160           |
| policy_entrop

---------------------------------------
| approxkl           | 9.108693e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 2.92e+03       |
| ep_reward_mean     | 4.68           |
| explained_variance | -0.00136       |
| fps                | 24             |
| n_updates          | 173            |
| policy_entropy     | 2.1863484      |
| policy_loss        | -2.6784837e-05 |
| serial_timesteps   | 22144          |
| time_elapsed       | 1.41e+03       |
| total_timesteps    | 22144          |
| value_loss         | 0.007343568    |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 1.3347549e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 2.92e+03       |
| ep_reward_mean     | 4.68           |
| explained_variance | 0.00581        |
| fps                | 11             |
| n_updates          | 174            |
| policy_entropy     | 2.1855025      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 1.5268769e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 2.92e+03      |
| ep_reward_mean     | 4.68          |
| explained_variance | 0.00122       |
| fps                | 23            |
| n_updates          | 187           |
| policy_entropy     | 2.1685958     |
| policy_loss        | -7.41072e-05  |
| serial_timesteps   | 23936         |
| time_elapsed       | 1.52e+03      |
| total_timesteps    | 23936         |
| value_loss         | 0.002341479   |
--------------------------------------
--------------------------------------
| approxkl           | 1.2802766e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 2.92e+03      |
| ep_reward_mean     | 4.68          |
| explained_variance | 0.000702      |
| fps                | 24            |
| n_updates          | 188           |
| policy_entropy     | 2.171996      |
| policy_loss        | 4.393235e-05  |
| serial_timesteps   | 24

Round done
---------------------------------------
| approxkl           | 1.249146e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.1e+03        |
| ep_reward_mean     | 4.86           |
| explained_variance | 0.00222        |
| fps                | 11             |
| n_updates          | 201            |
| policy_entropy     | 2.154992       |
| policy_loss        | -0.00019539893 |
| serial_timesteps   | 25728          |
| time_elapsed       | 1.64e+03       |
| total_timesteps    | 25728          |
| value_loss         | 0.0038377224   |
---------------------------------------
---------------------------------------
| approxkl           | 1.7940567e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.1e+03        |
| ep_reward_mean     | 4.86           |
| explained_variance | -0.00054       |
| fps                | 24             |
| n_updates          | 202            |
| policy_entropy     | 2.1562178      |
| policy_loss        | -0.000

---------------------------------------
| approxkl           | 5.9607385e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.1e+03        |
| ep_reward_mean     | 4.86           |
| explained_variance | 0.00192        |
| fps                | 24             |
| n_updates          | 215            |
| policy_entropy     | 2.131342       |
| policy_loss        | -0.00011615828 |
| serial_timesteps   | 27520          |
| time_elapsed       | 1.75e+03       |
| total_timesteps    | 27520          |
| value_loss         | 0.006997489    |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 4.2803669e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.1e+03        |
| ep_reward_mean     | 4.86           |
| explained_variance | -0.00159       |
| fps                | 11             |
| n_updates          | 216            |
| policy_entropy     | 2.1305978      |
| policy_loss        | -5.252

---------------------------------------
| approxkl           | 5.064716e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.11e+03       |
| ep_reward_mean     | 5.21           |
| explained_variance | -0.000901      |
| fps                | 24             |
| n_updates          | 229            |
| policy_entropy     | 2.0956826      |
| policy_loss        | -0.00050329417 |
| serial_timesteps   | 29312          |
| time_elapsed       | 1.86e+03       |
| total_timesteps    | 29312          |
| value_loss         | 0.0018563571   |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 1.1600879e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.11e+03       |
| ep_reward_mean     | 5.21           |
| explained_variance | 0.0042         |
| fps                | 9              |
| n_updates          | 230            |
| policy_entropy     | 2.0906305      |
| policy_loss        | -0.000

---------------------------------------
| approxkl           | 9.2536993e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.11e+03       |
| ep_reward_mean     | 5.21           |
| explained_variance | 0.000558       |
| fps                | 23             |
| n_updates          | 243            |
| policy_entropy     | 2.0469904      |
| policy_loss        | -3.8580038e-05 |
| serial_timesteps   | 31104          |
| time_elapsed       | 1.97e+03       |
| total_timesteps    | 31104          |
| value_loss         | 0.0050201183   |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 1.3453691e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.11e+03       |
| ep_reward_mean     | 5.21           |
| explained_variance | 0.000511       |
| fps                | 11             |
| n_updates          | 244            |
| policy_entropy     | 2.0479078      |
| policy_loss        | -0.000

Round done
--------------------------------------
| approxkl           | 2.925912e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.23e+03      |
| ep_reward_mean     | 5.83          |
| explained_variance | -0.00844      |
| fps                | 11            |
| n_updates          | 257           |
| policy_entropy     | 2.0801768     |
| policy_loss        | -8.122437e-05 |
| serial_timesteps   | 32896         |
| time_elapsed       | 2.08e+03      |
| total_timesteps    | 32896         |
| value_loss         | 0.0055694724  |
--------------------------------------
---------------------------------------
| approxkl           | 2.9025632e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 5.83           |
| explained_variance | -0.00223       |
| fps                | 23             |
| n_updates          | 258            |
| policy_entropy     | 2.0940275      |
| policy_loss        | -0.00019969698 |
| se

--------------------------------------
| approxkl           | 4.5640496e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.23e+03      |
| ep_reward_mean     | 5.83          |
| explained_variance | 0.00327       |
| fps                | 24            |
| n_updates          | 271           |
| policy_entropy     | 2.1549153     |
| policy_loss        | -6.403122e-05 |
| serial_timesteps   | 34688         |
| time_elapsed       | 2.19e+03      |
| total_timesteps    | 34688         |
| value_loss         | 0.008146534   |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 2.8608397e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 5.83           |
| explained_variance | -0.000763      |
| fps                | 11             |
| n_updates          | 272            |
| policy_entropy     | 2.1526852      |
| policy_loss        | -0.00017410144 |
| se

--------------------------------------
| approxkl           | 5.2691157e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.29e+03      |
| ep_reward_mean     | 5.89          |
| explained_variance | 0.00551       |
| fps                | 23            |
| n_updates          | 285           |
| policy_entropy     | 1.9647614     |
| policy_loss        | -6.994046e-05 |
| serial_timesteps   | 36480         |
| time_elapsed       | 2.3e+03       |
| total_timesteps    | 36480         |
| value_loss         | 0.0075423215  |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 3.4095237e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.29e+03      |
| ep_reward_mean     | 5.89          |
| explained_variance | -0.0141       |
| fps                | 10            |
| n_updates          | 286           |
| policy_entropy     | 2.0082796     |
| policy_loss        | -0.0002280008 |
| serial_times

---------------------------------------
| approxkl           | 3.3650608e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.29e+03       |
| ep_reward_mean     | 5.89           |
| explained_variance | 0.00161        |
| fps                | 23             |
| n_updates          | 299            |
| policy_entropy     | 2.141982       |
| policy_loss        | -3.0767173e-05 |
| serial_timesteps   | 38272          |
| time_elapsed       | 2.41e+03       |
| total_timesteps    | 38272          |
| value_loss         | 0.004757374    |
---------------------------------------
---------------------------------------
| approxkl           | 2.9067355e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.29e+03       |
| ep_reward_mean     | 5.89           |
| explained_variance | -0.000879      |
| fps                | 24             |
| n_updates          | 300            |
| policy_entropy     | 2.1452847      |
| policy_loss        | -0.00017559784 |


---------------------------------------
| approxkl           | 1.6491102e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.29e+03       |
| ep_reward_mean     | 5.89           |
| explained_variance | -0.000757      |
| fps                | 23             |
| n_updates          | 313            |
| policy_entropy     | 2.0972416      |
| policy_loss        | -0.00011365116 |
| serial_timesteps   | 40064          |
| time_elapsed       | 2.51e+03       |
| total_timesteps    | 40064          |
| value_loss         | 0.0022437624   |
---------------------------------------
Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
---------------------------------------
| approxkl           | 3.7387906e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.34e+03       |
| ep_reward_mean     | 5.57           |
| explained_variance | 0.109          |
| fps                | 8              |
| n_updates          | 314            |
| polic

---------------------------------------
| approxkl           | 2.0370892e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.34e+03       |
| ep_reward_mean     | 5.57           |
| explained_variance | 0.00506        |
| fps                | 24             |
| n_updates          | 327            |
| policy_entropy     | 2.1424997      |
| policy_loss        | -0.00015129894 |
| serial_timesteps   | 41856          |
| time_elapsed       | 2.63e+03       |
| total_timesteps    | 41856          |
| value_loss         | 0.0048214425   |
---------------------------------------
---------------------------------------
| approxkl           | 2.0494554e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.34e+03       |
| ep_reward_mean     | 5.57           |
| explained_variance | 0.00124        |
| fps                | 24             |
| n_updates          | 328            |
| policy_entropy     | 2.1382852      |
| policy_loss        | -0.00013746321 |


---------------------------------------
| approxkl           | 1.4294143e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.24e+03       |
| ep_reward_mean     | 5.59           |
| explained_variance | -0.00186       |
| fps                | 23             |
| n_updates          | 341            |
| policy_entropy     | 2.0875847      |
| policy_loss        | -0.00016571954 |
| serial_timesteps   | 43648          |
| time_elapsed       | 2.73e+03       |
| total_timesteps    | 43648          |
| value_loss         | 0.0048835417   |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 1.1054291e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.24e+03       |
| ep_reward_mean     | 5.59           |
| explained_variance | 0.00181        |
| fps                | 9              |
| n_updates          | 342            |
| policy_entropy     | 2.0894358      |
| policy_loss        | -0.000

Round done
--------------------------------------
| approxkl           | 6.3058865e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.23e+03      |
| ep_reward_mean     | 5.48          |
| explained_variance | 0.00814       |
| fps                | 11            |
| n_updates          | 355           |
| policy_entropy     | 2.1406422     |
| policy_loss        | -9.524333e-05 |
| serial_timesteps   | 45440         |
| time_elapsed       | 2.84e+03      |
| total_timesteps    | 45440         |
| value_loss         | 0.0015154509  |
--------------------------------------
---------------------------------------
| approxkl           | 4.095442e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 5.48           |
| explained_variance | -0.0169        |
| fps                | 23             |
| n_updates          | 356            |
| policy_entropy     | 2.177385       |
| policy_loss        | -0.00010323152 |
| se

Round done
---------------------------------------
| approxkl           | 1.706492e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 5.48           |
| explained_variance | -0.00201       |
| fps                | 10             |
| n_updates          | 369            |
| policy_entropy     | 2.2173686      |
| policy_loss        | -1.1056662e-05 |
| serial_timesteps   | 47232          |
| time_elapsed       | 2.95e+03       |
| total_timesteps    | 47232          |
| value_loss         | 0.0029168124   |
---------------------------------------
---------------------------------------
| approxkl           | 4.5232304e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 5.48           |
| explained_variance | 0.000867       |
| fps                | 23             |
| n_updates          | 370            |
| policy_entropy     | 2.2164946      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 9.835595e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.17e+03      |
| ep_reward_mean     | 5.54          |
| explained_variance | -0.00111      |
| fps                | 23            |
| n_updates          | 383           |
| policy_entropy     | 2.2409706     |
| policy_loss        | -3.203703e-05 |
| serial_timesteps   | 49024         |
| time_elapsed       | 3.06e+03      |
| total_timesteps    | 49024         |
| value_loss         | 0.0043448084  |
--------------------------------------
--------------------------------------
| approxkl           | 1.5289007e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.17e+03      |
| ep_reward_mean     | 5.54          |
| explained_variance | 0.000401      |
| fps                | 23            |
| n_updates          | 384           |
| policy_entropy     | 2.2417047     |
| policy_loss        | 2.1059066e-05 |
| serial_timesteps   | 49

---------------------------------------
| approxkl           | 6.0315597e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.17e+03       |
| ep_reward_mean     | 5.54           |
| explained_variance | 0.000687       |
| fps                | 24             |
| n_updates          | 397            |
| policy_entropy     | 2.2219224      |
| policy_loss        | -0.00016108062 |
| serial_timesteps   | 50816          |
| time_elapsed       | 3.17e+03       |
| total_timesteps    | 50816          |
| value_loss         | 0.006978851    |
---------------------------------------
---------------------------------------
| approxkl           | 4.892223e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.17e+03       |
| ep_reward_mean     | 5.54           |
| explained_variance | 0.00134        |
| fps                | 24             |
| n_updates          | 398            |
| policy_entropy     | 2.215074       |
| policy_loss        | -0.00032639923 |


----------------------------------------
| approxkl           | 1.6275538e-06   |
| clipfrac           | 0.0             |
| ep_len_mean        | 3.17e+03        |
| ep_reward_mean     | 5.54            |
| explained_variance | -0.000498       |
| fps                | 24              |
| n_updates          | 411             |
| policy_entropy     | 2.2428195       |
| policy_loss        | -0.000110699795 |
| serial_timesteps   | 52608           |
| time_elapsed       | 3.27e+03        |
| total_timesteps    | 52608           |
| value_loss         | 0.0038816594    |
----------------------------------------
--------------------------------------
| approxkl           | 4.6952823e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.17e+03      |
| ep_reward_mean     | 5.54          |
| explained_variance | -0.000268     |
| fps                | 24            |
| n_updates          | 412           |
| policy_entropy     | 2.2432165     |
| policy_loss        | 3.9936975e-

Stage done
--------------------------------------
| approxkl           | 4.2651845e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.3e+03       |
| ep_reward_mean     | 5.46          |
| explained_variance | 0.000356      |
| fps                | 9             |
| n_updates          | 425           |
| policy_entropy     | 2.2125158     |
| policy_loss        | -6.776303e-05 |
| serial_timesteps   | 54400         |
| time_elapsed       | 3.38e+03      |
| total_timesteps    | 54400         |
| value_loss         | 0.0068894224  |
--------------------------------------
--------------------------------------
| approxkl           | 1.2187261e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.3e+03       |
| ep_reward_mean     | 5.46          |
| explained_variance | 0.00414       |
| fps                | 23            |
| n_updates          | 426           |
| policy_entropy     | 2.211699      |
| policy_loss        | -0.0002040267 |
| serial_times

Round done
---------------------------------------
| approxkl           | 8.7003525e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 5.3            |
| explained_variance | -0.0071        |
| fps                | 10             |
| n_updates          | 439            |
| policy_entropy     | 2.2429705      |
| policy_loss        | -8.6553395e-05 |
| serial_timesteps   | 56192          |
| time_elapsed       | 3.49e+03       |
| total_timesteps    | 56192          |
| value_loss         | 0.0023769687   |
---------------------------------------
--------------------------------------
| approxkl           | 5.7123884e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 5.3           |
| explained_variance | 0.0026        |
| fps                | 24            |
| n_updates          | 440           |
| policy_entropy     | 2.246283      |
| policy_loss        | -6.19553e-05  |

--------------------------------------
| approxkl           | 6.2103095e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.18e+03      |
| ep_reward_mean     | 5.13          |
| explained_variance | -0.00851      |
| fps                | 23            |
| n_updates          | 453           |
| policy_entropy     | 2.2704144     |
| policy_loss        | 1.9397587e-05 |
| serial_timesteps   | 57984         |
| time_elapsed       | 3.61e+03      |
| total_timesteps    | 57984         |
| value_loss         | 0.005832951   |
--------------------------------------
Stage done
---------------------------------------
| approxkl           | 8.3780026e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 5.13           |
| explained_variance | 0.00789        |
| fps                | 9              |
| n_updates          | 454            |
| policy_entropy     | 2.2725575      |
| policy_loss        | -0.00013613328 |
| se

---------------------------------------
| approxkl           | 8.56283e-07    |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 5.13           |
| explained_variance | 7.51e-05       |
| fps                | 24             |
| n_updates          | 467            |
| policy_entropy     | 2.2726147      |
| policy_loss        | -0.00014078617 |
| serial_timesteps   | 59776          |
| time_elapsed       | 3.72e+03       |
| total_timesteps    | 59776          |
| value_loss         | 0.0051721055   |
---------------------------------------
--------------------------------------
| approxkl           | 1.4745506e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.18e+03      |
| ep_reward_mean     | 5.13          |
| explained_variance | -0.0013       |
| fps                | 23            |
| n_updates          | 468           |
| policy_entropy     | 2.270392      |
| policy_loss        | 2.8006732e-05 |
| serial_t

---------------------------------------
| approxkl           | 3.899054e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 5.13           |
| explained_variance | -0.00133       |
| fps                | 23             |
| n_updates          | 481            |
| policy_entropy     | 2.2669082      |
| policy_loss        | -0.00034518074 |
| serial_timesteps   | 61568          |
| time_elapsed       | 3.82e+03       |
| total_timesteps    | 61568          |
| value_loss         | 0.0018246255   |
---------------------------------------
Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 4.396962e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.24e+03      |
| ep_reward_mean     | 5.19          |
| explained_variance | 0.168         |
| fps                | 8             |
| n_updates          | 482           |
| policy_entrop

---------------------------------------
| approxkl           | 1.5612876e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.24e+03       |
| ep_reward_mean     | 5.19           |
| explained_variance | -0.00223       |
| fps                | 24             |
| n_updates          | 495            |
| policy_entropy     | 2.2837112      |
| policy_loss        | -0.00024340581 |
| serial_timesteps   | 63360          |
| time_elapsed       | 3.93e+03       |
| total_timesteps    | 63360          |
| value_loss         | 0.0042439904   |
---------------------------------------
Round done
--------------------------------------
| approxkl           | 9.701469e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.24e+03      |
| ep_reward_mean     | 5.19          |
| explained_variance | 0.0119        |
| fps                | 10            |
| n_updates          | 496           |
| policy_entropy     | 2.2813816     |
| policy_loss        | -4.910305e-05 |

---------------------------------------
| approxkl           | 1.1043289e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 5.22           |
| explained_variance | -0.00151       |
| fps                | 24             |
| n_updates          | 509            |
| policy_entropy     | 2.2882         |
| policy_loss        | -0.00013178214 |
| serial_timesteps   | 65152          |
| time_elapsed       | 4.03e+03       |
| total_timesteps    | 65152          |
| value_loss         | 0.004743828    |
---------------------------------------
Round done
--------------------------------------
| approxkl           | 2.864852e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 5.22          |
| explained_variance | -0.00128      |
| fps                | 11            |
| n_updates          | 510           |
| policy_entropy     | 2.286272      |
| policy_loss        | -0.0001930669 |

Stage done
---------------------------------------
| approxkl           | 2.4149963e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 5.22           |
| explained_variance | -0.00173       |
| fps                | 9              |
| n_updates          | 523            |
| policy_entropy     | 2.2885864      |
| policy_loss        | -0.00024347007 |
| serial_timesteps   | 66944          |
| time_elapsed       | 4.14e+03       |
| total_timesteps    | 66944          |
| value_loss         | 0.008038282    |
---------------------------------------
---------------------------------------
| approxkl           | 1.5034259e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 5.22           |
| explained_variance | 0.000309       |
| fps                | 24             |
| n_updates          | 524            |
| policy_entropy     | 2.2898965      |
| policy_loss        | -9.355

---------------------------------------
| approxkl           | 3.214742e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 5.4            |
| explained_variance | -0.00202       |
| fps                | 24             |
| n_updates          | 537            |
| policy_entropy     | 2.2732565      |
| policy_loss        | -9.8461285e-05 |
| serial_timesteps   | 68736          |
| time_elapsed       | 4.26e+03       |
| total_timesteps    | 68736          |
| value_loss         | 0.002768631    |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 5.246518e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 5.4            |
| explained_variance | -0.00033       |
| fps                | 12             |
| n_updates          | 538            |
| policy_entropy     | 2.273832       |
| policy_loss        | -4.611

--------------------------------------
| approxkl           | 2.1320889e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.23e+03      |
| ep_reward_mean     | 5.4           |
| explained_variance | -0.000411     |
| fps                | 23            |
| n_updates          | 551           |
| policy_entropy     | 2.2814832     |
| policy_loss        | -0.0002159588 |
| serial_timesteps   | 70528         |
| time_elapsed       | 4.37e+03      |
| total_timesteps    | 70528         |
| value_loss         | 0.0056218607  |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 2.0307098e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 5.4            |
| explained_variance | 8.99e-05       |
| fps                | 11             |
| n_updates          | 552            |
| policy_entropy     | 2.2788992      |
| policy_loss        | -5.8444217e-05 |
| se

---------------------------------------
| approxkl           | 3.0338206e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 5.4            |
| explained_variance | 0.00011        |
| fps                | 23             |
| n_updates          | 565            |
| policy_entropy     | 2.2478182      |
| policy_loss        | -5.8233272e-05 |
| serial_timesteps   | 72320          |
| time_elapsed       | 4.48e+03       |
| total_timesteps    | 72320          |
| value_loss         | 0.0027209911   |
---------------------------------------
Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
---------------------------------------
| approxkl           | 5.071033e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.29e+03       |
| ep_reward_mean     | 5.53           |
| explained_variance | 0.0645         |
| fps                | 8              |
| n_updates          | 566            |
| polic

Round done
---------------------------------------
| approxkl           | 1.0978085e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.29e+03       |
| ep_reward_mean     | 5.53           |
| explained_variance | 0.00222        |
| fps                | 11             |
| n_updates          | 579            |
| policy_entropy     | 2.2590246      |
| policy_loss        | -0.00013091415 |
| serial_timesteps   | 74112          |
| time_elapsed       | 4.59e+03       |
| total_timesteps    | 74112          |
| value_loss         | 0.008183417    |
---------------------------------------
---------------------------------------
| approxkl           | 1.0184324e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.29e+03       |
| ep_reward_mean     | 5.53           |
| explained_variance | -0.000201      |
| fps                | 24             |
| n_updates          | 580            |
| policy_entropy     | 2.2600403      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 3.3206294e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.29e+03      |
| ep_reward_mean     | 5.53          |
| explained_variance | -0.000929     |
| fps                | 24            |
| n_updates          | 593           |
| policy_entropy     | 2.2513022     |
| policy_loss        | -8.555874e-05 |
| serial_timesteps   | 75904         |
| time_elapsed       | 4.71e+03      |
| total_timesteps    | 75904         |
| value_loss         | 0.005003074   |
--------------------------------------
---------------------------------------
| approxkl           | 7.504259e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.29e+03       |
| ep_reward_mean     | 5.53           |
| explained_variance | 0.00153        |
| fps                | 24             |
| n_updates          | 594            |
| policy_entropy     | 2.2522945      |
| policy_loss        | -0.00017134473 |
| serial_timest

---------------------------------------
| approxkl           | 6.075581e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.37e+03       |
| ep_reward_mean     | 5.6            |
| explained_variance | 0.0254         |
| fps                | 24             |
| n_updates          | 607            |
| policy_entropy     | 2.2120967      |
| policy_loss        | -5.1330775e-05 |
| serial_timesteps   | 77696          |
| time_elapsed       | 4.82e+03       |
| total_timesteps    | 77696          |
| value_loss         | 0.00240253     |
---------------------------------------
Round done
--------------------------------------
| approxkl           | 1.4127968e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.37e+03      |
| ep_reward_mean     | 5.6           |
| explained_variance | 0.00869       |
| fps                | 11            |
| n_updates          | 608           |
| policy_entropy     | 2.2203417     |
| policy_loss        | -8.811802e-05 |

Stage done
---------------------------------------
| approxkl           | 4.010105e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.37e+03       |
| ep_reward_mean     | 5.6            |
| explained_variance | -5.81e-05      |
| fps                | 9              |
| n_updates          | 621            |
| policy_entropy     | 2.2101507      |
| policy_loss        | -0.00019505527 |
| serial_timesteps   | 79488          |
| time_elapsed       | 4.93e+03       |
| total_timesteps    | 79488          |
| value_loss         | 0.005975012    |
---------------------------------------
---------------------------------------
| approxkl           | 4.0682776e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.37e+03       |
| ep_reward_mean     | 5.6            |
| explained_variance | 0.000944       |
| fps                | 24             |
| n_updates          | 622            |
| policy_entropy     | 2.2070875      |
| policy_loss        | -0.000

Round done
--------------------------------------
| approxkl           | 4.1986136e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.37e+03      |
| ep_reward_mean     | 5.6           |
| explained_variance | -0.00186      |
| fps                | 11            |
| n_updates          | 635           |
| policy_entropy     | 2.1408157     |
| policy_loss        | -7.38129e-05  |
| serial_timesteps   | 81280         |
| time_elapsed       | 5.04e+03      |
| total_timesteps    | 81280         |
| value_loss         | 0.006137093   |
--------------------------------------
--------------------------------------
| approxkl           | 3.1824363e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.37e+03      |
| ep_reward_mean     | 5.6           |
| explained_variance | 0.000686      |
| fps                | 24            |
| n_updates          | 636           |
| policy_entropy     | 2.137181      |
| policy_loss        | -0.0002645105 |
| serial_times

---------------------------------------
| approxkl           | 6.57933e-06    |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.41e+03       |
| ep_reward_mean     | 5.79           |
| explained_variance | -5.3e-05       |
| fps                | 24             |
| n_updates          | 649            |
| policy_entropy     | 2.172615       |
| policy_loss        | -0.00029858574 |
| serial_timesteps   | 83072          |
| time_elapsed       | 5.16e+03       |
| total_timesteps    | 83072          |
| value_loss         | 0.0034678434   |
---------------------------------------
---------------------------------------
| approxkl           | 9.831846e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.41e+03       |
| ep_reward_mean     | 5.79           |
| explained_variance | 0.00679        |
| fps                | 24             |
| n_updates          | 650            |
| policy_entropy     | 2.1799026      |
| policy_loss        | -0.00014790148 |


--------------------------------------
| approxkl           | 1.1203907e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.41e+03      |
| ep_reward_mean     | 5.79          |
| explained_variance | -0.00244      |
| fps                | 24            |
| n_updates          | 663           |
| policy_entropy     | 2.2152026     |
| policy_loss        | -0.0001122579 |
| serial_timesteps   | 84864         |
| time_elapsed       | 5.26e+03      |
| total_timesteps    | 84864         |
| value_loss         | 0.0022451454  |
--------------------------------------
--------------------------------------
| approxkl           | 5.4623215e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.41e+03      |
| ep_reward_mean     | 5.79          |
| explained_variance | 0.00168       |
| fps                | 24            |
| n_updates          | 664           |
| policy_entropy     | 2.2130878     |
| policy_loss        | -0.0003832318 |
| serial_timesteps   | 84

Round done
---------------------------------------
| approxkl           | 8.681592e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.4e+03        |
| ep_reward_mean     | 5.78           |
| explained_variance | 0.000704       |
| fps                | 11             |
| n_updates          | 677            |
| policy_entropy     | 2.1550517      |
| policy_loss        | -0.00065953657 |
| serial_timesteps   | 86656          |
| time_elapsed       | 5.37e+03       |
| total_timesteps    | 86656          |
| value_loss         | 0.005626499    |
---------------------------------------
---------------------------------------
| approxkl           | 1.1280194e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.4e+03        |
| ep_reward_mean     | 5.78           |
| explained_variance | 0.00198        |
| fps                | 24             |
| n_updates          | 678            |
| policy_entropy     | 2.1453815      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 6.0624693e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.4e+03       |
| ep_reward_mean     | 5.78          |
| explained_variance | 0.00428       |
| fps                | 24            |
| n_updates          | 691           |
| policy_entropy     | 2.1316514     |
| policy_loss        | -0.0002676025 |
| serial_timesteps   | 88448         |
| time_elapsed       | 5.48e+03      |
| total_timesteps    | 88448         |
| value_loss         | 0.0040497007  |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 9.5692085e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.4e+03        |
| ep_reward_mean     | 5.78           |
| explained_variance | 0.000533       |
| fps                | 10             |
| n_updates          | 692            |
| policy_entropy     | 2.1236691      |
| policy_loss        | -0.00043583382 |
| se

---------------------------------------
| approxkl           | 3.4739476e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.32e+03       |
| ep_reward_mean     | 5.47           |
| explained_variance | -0.00147       |
| fps                | 24             |
| n_updates          | 705            |
| policy_entropy     | 2.1881166      |
| policy_loss        | -0.00026802532 |
| serial_timesteps   | 90240          |
| time_elapsed       | 5.59e+03       |
| total_timesteps    | 90240          |
| value_loss         | 0.011514302    |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 2.2778142e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.32e+03       |
| ep_reward_mean     | 5.47           |
| explained_variance | -0.00271       |
| fps                | 9              |
| n_updates          | 706            |
| policy_entropy     | 2.1816716      |
| policy_loss        | -0.000

Round done
---------------------------------------
| approxkl           | 8.247822e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.32e+03       |
| ep_reward_mean     | 5.47           |
| explained_variance | 0.00288        |
| fps                | 11             |
| n_updates          | 719            |
| policy_entropy     | 2.2178955      |
| policy_loss        | -0.00011317665 |
| serial_timesteps   | 92032          |
| time_elapsed       | 5.69e+03       |
| total_timesteps    | 92032          |
| value_loss         | 0.009285424    |
---------------------------------------
----------------------------------------
| approxkl           | 8.884856e-07    |
| clipfrac           | 0.0             |
| ep_len_mean        | 3.32e+03        |
| ep_reward_mean     | 5.47            |
| explained_variance | -0.000302       |
| fps                | 24              |
| n_updates          | 720             |
| policy_entropy     | 2.2171261       |
| policy_loss       

--------------------------------------
| approxkl           | 3.0857016e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.31e+03      |
| ep_reward_mean     | 5.42          |
| explained_variance | 0.00266       |
| fps                | 23            |
| n_updates          | 733           |
| policy_entropy     | 2.1641488     |
| policy_loss        | 1.925975e-06  |
| serial_timesteps   | 93824         |
| time_elapsed       | 5.81e+03      |
| total_timesteps    | 93824         |
| value_loss         | 0.005441781   |
--------------------------------------
--------------------------------------
| approxkl           | 4.7278692e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.31e+03      |
| ep_reward_mean     | 5.42          |
| explained_variance | 0.00263       |
| fps                | 24            |
| n_updates          | 734           |
| policy_entropy     | 2.1622949     |
| policy_loss        | 9.239465e-05  |
| serial_timesteps   | 93

---------------------------------------
| approxkl           | 6.9711414e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.31e+03       |
| ep_reward_mean     | 5.42           |
| explained_variance | 8.38e-05       |
| fps                | 24             |
| n_updates          | 747            |
| policy_entropy     | 2.1536942      |
| policy_loss        | -0.00016167387 |
| serial_timesteps   | 95616          |
| time_elapsed       | 5.92e+03       |
| total_timesteps    | 95616          |
| value_loss         | 0.0092792185   |
---------------------------------------
--------------------------------------
| approxkl           | 2.424473e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.31e+03      |
| ep_reward_mean     | 5.42          |
| explained_variance | -0.000401     |
| fps                | 24            |
| n_updates          | 748           |
| policy_entropy     | 2.1493497     |
| policy_loss        | 6.0535967e-06 |
| serial_t

Stage done
--------------------------------------
| approxkl           | 5.4310412e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.33e+03      |
| ep_reward_mean     | 5.3           |
| explained_variance | -0.0047       |
| fps                | 9             |
| n_updates          | 761           |
| policy_entropy     | 2.1465464     |
| policy_loss        | -0.000498699  |
| serial_timesteps   | 97408         |
| time_elapsed       | 6.02e+03      |
| total_timesteps    | 97408         |
| value_loss         | 0.003990592   |
--------------------------------------
--------------------------------------
| approxkl           | 2.4275636e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.33e+03      |
| ep_reward_mean     | 5.3           |
| explained_variance | -0.000382     |
| fps                | 24            |
| n_updates          | 762           |
| policy_entropy     | 2.1365762     |
| policy_loss        | -0.0011347709 |
| serial_times

----------------------------------------
| approxkl           | 2.936722e-06    |
| clipfrac           | 0.0             |
| ep_len_mean        | 3.33e+03        |
| ep_reward_mean     | 5.3             |
| explained_variance | 0.000688        |
| fps                | 23              |
| n_updates          | 775             |
| policy_entropy     | 2.0729244       |
| policy_loss        | -0.000119931996 |
| serial_timesteps   | 99200           |
| time_elapsed       | 6.14e+03        |
| total_timesteps    | 99200           |
| value_loss         | 0.005892229     |
----------------------------------------
--------------------------------------
| approxkl           | 1.1341725e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.33e+03      |
| ep_reward_mean     | 5.3           |
| explained_variance | -0.000457     |
| fps                | 23            |
| n_updates          | 776           |
| policy_entropy     | 2.0757718     |
| policy_loss        | 3.0715019e-

-------------------------------------
| approxkl           | 0.0034054392 |
| clipfrac           | 0.052734375  |
| ep_len_mean        | 3.24e+03     |
| ep_reward_mean     | 5.07         |
| explained_variance | -0.0183      |
| fps                | 24           |
| n_updates          | 789          |
| policy_entropy     | 1.9892011    |
| policy_loss        | -0.014775332 |
| serial_timesteps   | 100992       |
| time_elapsed       | 6.25e+03     |
| total_timesteps    | 100992       |
| value_loss         | 0.0053811837 |
-------------------------------------
Stage done
-------------------------------------
| approxkl           | 0.008228749  |
| clipfrac           | 0.13085938   |
| ep_len_mean        | 3.24e+03     |
| ep_reward_mean     | 5.07         |
| explained_variance | -0.194       |
| fps                | 9            |
| n_updates          | 790          |
| policy_entropy     | 2.1370654    |
| policy_loss        | -0.020648547 |
| serial_timesteps   | 101120       |
|

--------------------------------------
| approxkl           | 0.00046056235 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 5.05          |
| explained_variance | 0.0056        |
| fps                | 24            |
| n_updates          | 804           |
| policy_entropy     | 1.8799024     |
| policy_loss        | -0.0063194465 |
| serial_timesteps   | 102912        |
| time_elapsed       | 6.37e+03      |
| total_timesteps    | 102912        |
| value_loss         | 0.0075272718  |
--------------------------------------
Stage done
--------------------------------------
| approxkl           | 0.00021781492 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 5.05          |
| explained_variance | -0.0481       |
| fps                | 9             |
| n_updates          | 805           |
| policy_entropy     | 1.8279824     |
| policy_loss        | -0.005997669  |
| serial_times

--------------------------------------
| approxkl           | 0.00090003805 |
| clipfrac           | 0.00390625    |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 5.05          |
| explained_variance | 0.00962       |
| fps                | 24            |
| n_updates          | 818           |
| policy_entropy     | 2.1402602     |
| policy_loss        | -0.0038464516 |
| serial_timesteps   | 104704        |
| time_elapsed       | 6.48e+03      |
| total_timesteps    | 104704        |
| value_loss         | 0.008551793   |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 0.0003084219  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 5.05          |
| explained_variance | 0.0203        |
| fps                | 10            |
| n_updates          | 819           |
| policy_entropy     | 2.2014928     |
| policy_loss        | -0.0060370825 |
| serial_times

Round done
--------------------------------------
| approxkl           | 2.9612414e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.19e+03      |
| ep_reward_mean     | 4.99          |
| explained_variance | 0.0115        |
| fps                | 11            |
| n_updates          | 833           |
| policy_entropy     | 2.307207      |
| policy_loss        | -0.0005134754 |
| serial_timesteps   | 106624        |
| time_elapsed       | 6.6e+03       |
| total_timesteps    | 106624        |
| value_loss         | 0.005485813   |
--------------------------------------
--------------------------------------
| approxkl           | 5.0462077e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.19e+03      |
| ep_reward_mean     | 4.99          |
| explained_variance | -0.0118       |
| fps                | 23            |
| n_updates          | 834           |
| policy_entropy     | 2.2882895     |
| policy_loss        | -0.001692418  |
| serial_times

Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 0.00097454805 |
| clipfrac           | 0.0078125     |
| ep_len_mean        | 3.19e+03      |
| ep_reward_mean     | 4.88          |
| explained_variance | -0.000278     |
| fps                | 8             |
| n_updates          | 847           |
| policy_entropy     | 2.287274      |
| policy_loss        | -0.0073583648 |
| serial_timesteps   | 108416        |
| time_elapsed       | 6.7e+03       |
| total_timesteps    | 108416        |
| value_loss         | 0.011286041   |
--------------------------------------
---------------------------------------
| approxkl           | 0.00024299083  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 4.88           |
| explained_variance | 0.00298        |
| fps                | 24             |
| n_updates          | 848            |
| policy_entropy     |

--------------------------------------
| approxkl           | 2.06095e-05   |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.19e+03      |
| ep_reward_mean     | 4.88          |
| explained_variance | -0.00212      |
| fps                | 24            |
| n_updates          | 861           |
| policy_entropy     | 2.3654523     |
| policy_loss        | -8.873688e-05 |
| serial_timesteps   | 110208        |
| time_elapsed       | 6.82e+03      |
| total_timesteps    | 110208        |
| value_loss         | 0.006991885   |
--------------------------------------
--------------------------------------
| approxkl           | 2.0935266e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.19e+03      |
| ep_reward_mean     | 4.88          |
| explained_variance | -0.0044       |
| fps                | 23            |
| n_updates          | 862           |
| policy_entropy     | 2.372467      |
| policy_loss        | -0.000405211  |
| serial_timesteps   | 11

--------------------------------------
| approxkl           | 0.00010843561 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.18e+03      |
| ep_reward_mean     | 4.78          |
| explained_variance | 0.00724       |
| fps                | 24            |
| n_updates          | 875           |
| policy_entropy     | 2.236906      |
| policy_loss        | 0.0006522089  |
| serial_timesteps   | 112000        |
| time_elapsed       | 6.93e+03      |
| total_timesteps    | 112000        |
| value_loss         | 0.0021766042  |
--------------------------------------
--------------------------------------
| approxkl           | 7.973898e-05  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.18e+03      |
| ep_reward_mean     | 4.78          |
| explained_variance | 0.00168       |
| fps                | 24            |
| n_updates          | 876           |
| policy_entropy     | 2.214272      |
| policy_loss        | -0.0016809627 |
| serial_timesteps   | 11

--------------------------------------
| approxkl           | 3.893442e-05  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.18e+03      |
| ep_reward_mean     | 4.78          |
| explained_variance | 0.00046       |
| fps                | 23            |
| n_updates          | 889           |
| policy_entropy     | 2.2206364     |
| policy_loss        | -8.381903e-05 |
| serial_timesteps   | 113792        |
| time_elapsed       | 7.04e+03      |
| total_timesteps    | 113792        |
| value_loss         | 0.00455708    |
--------------------------------------
--------------------------------------
| approxkl           | 1.2641165e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.18e+03      |
| ep_reward_mean     | 4.78          |
| explained_variance | 0.000886      |
| fps                | 23            |
| n_updates          | 890           |
| policy_entropy     | 2.2321134     |
| policy_loss        | 0.00019626459 |
| serial_timesteps   | 11

Stage done
---------------------------------------
| approxkl           | 5.970538e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 4.78           |
| explained_variance | 6.93e-05       |
| fps                | 9              |
| n_updates          | 903            |
| policy_entropy     | 2.2564807      |
| policy_loss        | -0.00016472861 |
| serial_timesteps   | 115584         |
| time_elapsed       | 7.14e+03       |
| total_timesteps    | 115584         |
| value_loss         | 0.009969758    |
---------------------------------------
---------------------------------------
| approxkl           | 1.757842e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 4.78           |
| explained_variance | 0.000164       |
| fps                | 24             |
| n_updates          | 904            |
| policy_entropy     | 2.2535172      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 1.9444502e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.24e+03      |
| ep_reward_mean     | 4.88          |
| explained_variance | 0.00125       |
| fps                | 24            |
| n_updates          | 917           |
| policy_entropy     | 2.2314832     |
| policy_loss        | -0.0003034491 |
| serial_timesteps   | 117376        |
| time_elapsed       | 7.25e+03      |
| total_timesteps    | 117376        |
| value_loss         | 0.0043599885  |
--------------------------------------
Stage done
--------------------------------------
| approxkl           | 1.1484259e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.24e+03      |
| ep_reward_mean     | 4.88          |
| explained_variance | 0.000644      |
| fps                | 10            |
| n_updates          | 918           |
| policy_entropy     | 2.227828      |
| policy_loss        | -8.936971e-06 |
| serial_times

--------------------------------------
| approxkl           | 0.00039008207 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.22e+03      |
| ep_reward_mean     | 4.84          |
| explained_variance | -0.0666       |
| fps                | 24            |
| n_updates          | 931           |
| policy_entropy     | 2.0271735     |
| policy_loss        | -0.0013374202 |
| serial_timesteps   | 119168        |
| time_elapsed       | 7.36e+03      |
| total_timesteps    | 119168        |
| value_loss         | 0.0023958266  |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 0.00032054484  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 4.84           |
| explained_variance | -0.00171       |
| fps                | 11             |
| n_updates          | 932            |
| policy_entropy     | 2.0211143      |
| policy_loss        | -0.00017865095 |
| se

--------------------------------------
| approxkl           | 4.5161218e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.22e+03      |
| ep_reward_mean     | 4.84          |
| explained_variance | -0.00304      |
| fps                | 23            |
| n_updates          | 945           |
| policy_entropy     | 1.6588811     |
| policy_loss        | -0.0008588899 |
| serial_timesteps   | 120960        |
| time_elapsed       | 7.47e+03      |
| total_timesteps    | 120960        |
| value_loss         | 0.00414099    |
--------------------------------------
Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
-------------------------------------
| approxkl           | 0.004281774  |
| clipfrac           | 0.05859375   |
| ep_len_mean        | 3.19e+03     |
| ep_reward_mean     | 4.8          |
| explained_variance | 0.182        |
| fps                | 8            |
| n_updates          | 946          |
| policy_entropy     | 1.6104487    |


---------------------------------------
| approxkl           | 7.547411e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 4.8            |
| explained_variance | 0.000997       |
| fps                | 24             |
| n_updates          | 959            |
| policy_entropy     | 1.5296063      |
| policy_loss        | -0.00024151243 |
| serial_timesteps   | 122752         |
| time_elapsed       | 7.58e+03       |
| total_timesteps    | 122752         |
| value_loss         | 0.0013771738   |
---------------------------------------
Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
-------------------------------------
| approxkl           | 0.0005307642 |
| clipfrac           | 0.001953125  |
| ep_len_mean        | 3.15e+03     |
| ep_reward_mean     | 4.8          |
| explained_variance | 0.292        |
| fps                | 8            |
| n_updates          | 960          |
| policy_entropy     | 

Stage done
--------------------------------------
| approxkl           | 0.0004630039  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.15e+03      |
| ep_reward_mean     | 4.8           |
| explained_variance | 0.00476       |
| fps                | 9             |
| n_updates          | 973           |
| policy_entropy     | 1.5469594     |
| policy_loss        | -0.0018893988 |
| serial_timesteps   | 124544        |
| time_elapsed       | 7.69e+03      |
| total_timesteps    | 124544        |
| value_loss         | 0.0064349864  |
--------------------------------------
--------------------------------------
| approxkl           | 0.00016780707 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.15e+03      |
| ep_reward_mean     | 4.8           |
| explained_variance | 0.00205       |
| fps                | 24            |
| n_updates          | 974           |
| policy_entropy     | 1.4746153     |
| policy_loss        | -0.0019686953 |
| serial_times

--------------------------------------
| approxkl           | 3.8374485e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.15e+03      |
| ep_reward_mean     | 4.8           |
| explained_variance | -0.000122     |
| fps                | 23            |
| n_updates          | 987           |
| policy_entropy     | 1.5073079     |
| policy_loss        | -0.0005263444 |
| serial_timesteps   | 126336        |
| time_elapsed       | 7.8e+03       |
| total_timesteps    | 126336        |
| value_loss         | 0.008064142   |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 2.7279593e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.15e+03       |
| ep_reward_mean     | 4.8            |
| explained_variance | 0.00163        |
| fps                | 10             |
| n_updates          | 988            |
| policy_entropy     | 1.4850086      |
| policy_loss        | -0.00020169467 |
| se

Round done
---------------------------------------
| approxkl           | 3.135951e-05   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.17e+03       |
| ep_reward_mean     | 4.88           |
| explained_variance | -0.00472       |
| fps                | 10             |
| n_updates          | 1001           |
| policy_entropy     | 1.3712909      |
| policy_loss        | -0.00032148324 |
| serial_timesteps   | 128128         |
| time_elapsed       | 7.91e+03       |
| total_timesteps    | 128128         |
| value_loss         | 0.004839557    |
---------------------------------------
--------------------------------------
| approxkl           | 7.19934e-05   |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.17e+03      |
| ep_reward_mean     | 4.88          |
| explained_variance | 0.00376       |
| fps                | 23            |
| n_updates          | 1002          |
| policy_entropy     | 1.4082036     |
| policy_loss        | -0.0005752146 |

---------------------------------------
| approxkl           | 9.770823e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.17e+03       |
| ep_reward_mean     | 4.88           |
| explained_variance | -0.0044        |
| fps                | 24             |
| n_updates          | 1015           |
| policy_entropy     | 1.3703299      |
| policy_loss        | -0.00018829107 |
| serial_timesteps   | 129920         |
| time_elapsed       | 8.02e+03       |
| total_timesteps    | 129920         |
| value_loss         | 0.0064698365   |
---------------------------------------
Round done
--------------------------------------
| approxkl           | 1.4032935e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.17e+03      |
| ep_reward_mean     | 4.88          |
| explained_variance | 0.00537       |
| fps                | 11            |
| n_updates          | 1016          |
| policy_entropy     | 1.3642418     |
| policy_loss        | -0.0005637817 |

Round done
--------------------------------------
| approxkl           | 7.935621e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.18e+03      |
| ep_reward_mean     | 4.82          |
| explained_variance | 0.00122       |
| fps                | 11            |
| n_updates          | 1029          |
| policy_entropy     | 1.4809542     |
| policy_loss        | 0.00018274412 |
| serial_timesteps   | 131712        |
| time_elapsed       | 8.13e+03      |
| total_timesteps    | 131712        |
| value_loss         | 0.0021835319  |
--------------------------------------
--------------------------------------
| approxkl           | 9.568516e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.18e+03      |
| ep_reward_mean     | 4.82          |
| explained_variance | -0.00136      |
| fps                | 24            |
| n_updates          | 1030          |
| policy_entropy     | 1.4921162     |
| policy_loss        | 5.2915886e-05 |
| serial_times

Round done
--------------------------------------
| approxkl           | 7.50356e-05   |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.18e+03      |
| ep_reward_mean     | 4.82          |
| explained_variance | 0.00331       |
| fps                | 11            |
| n_updates          | 1043          |
| policy_entropy     | 1.8761525     |
| policy_loss        | -0.0017708484 |
| serial_timesteps   | 133504        |
| time_elapsed       | 8.24e+03      |
| total_timesteps    | 133504        |
| value_loss         | 0.005592731   |
--------------------------------------
--------------------------------------
| approxkl           | 6.679182e-05  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.18e+03      |
| ep_reward_mean     | 4.82          |
| explained_variance | 0.000616      |
| fps                | 24            |
| n_updates          | 1044          |
| policy_entropy     | 1.9045084     |
| policy_loss        | -8.595828e-05 |
| serial_times

--------------------------------------
| approxkl           | 1.963419e-05  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.18e+03      |
| ep_reward_mean     | 4.84          |
| explained_variance | -0.000135     |
| fps                | 24            |
| n_updates          | 1057          |
| policy_entropy     | 2.095324      |
| policy_loss        | 8.9693815e-05 |
| serial_timesteps   | 135296        |
| time_elapsed       | 8.35e+03      |
| total_timesteps    | 135296        |
| value_loss         | 0.009027736   |
--------------------------------------
Stage done
--------------------------------------
| approxkl           | 6.6740984e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.18e+03      |
| ep_reward_mean     | 4.84          |
| explained_variance | 0.00198       |
| fps                | 9             |
| n_updates          | 1058          |
| policy_entropy     | 2.0953617     |
| policy_loss        | -0.0016607977 |
| serial_times

--------------------------------------
| approxkl           | 5.8189755e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.18e+03      |
| ep_reward_mean     | 4.84          |
| explained_variance | 0.000257      |
| fps                | 23            |
| n_updates          | 1071          |
| policy_entropy     | 2.1833513     |
| policy_loss        | -0.0016566813 |
| serial_timesteps   | 137088        |
| time_elapsed       | 8.46e+03      |
| total_timesteps    | 137088        |
| value_loss         | 0.006097989   |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 0.00015370316 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.18e+03      |
| ep_reward_mean     | 4.84          |
| explained_variance | -0.014        |
| fps                | 10            |
| n_updates          | 1072          |
| policy_entropy     | 2.204743      |
| policy_loss        | -0.0017546453 |
| serial_times

-------------------------------------
| approxkl           | 5.017931e-06 |
| clipfrac           | 0.0          |
| ep_len_mean        | 3.2e+03      |
| ep_reward_mean     | 4.8          |
| explained_variance | 0.00475      |
| fps                | 23           |
| n_updates          | 1085         |
| policy_entropy     | 2.0888004    |
| policy_loss        | -0.000321161 |
| serial_timesteps   | 138880       |
| time_elapsed       | 8.58e+03     |
| total_timesteps    | 138880       |
| value_loss         | 0.0048668715 |
-------------------------------------
--------------------------------------
| approxkl           | 1.4589894e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 4.8           |
| explained_variance | -0.00198      |
| fps                | 23            |
| n_updates          | 1086          |
| policy_entropy     | 2.0934036     |
| policy_loss        | -0.0005147457 |
| serial_timesteps   | 139008        |
|

--------------------------------------
| approxkl           | 1.0745862e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 4.8           |
| explained_variance | 0.00131       |
| fps                | 24            |
| n_updates          | 1099          |
| policy_entropy     | 2.2439697     |
| policy_loss        | -0.0002900809 |
| serial_timesteps   | 140672        |
| time_elapsed       | 8.69e+03      |
| total_timesteps    | 140672        |
| value_loss         | 0.00606664    |
--------------------------------------
--------------------------------------
| approxkl           | 3.1795034e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 4.8           |
| explained_variance | 0.00126       |
| fps                | 24            |
| n_updates          | 1100          |
| policy_entropy     | 2.2487087     |
| policy_loss        | -8.203089e-05 |
| serial_timesteps   | 14

---------------------------------------
| approxkl           | 2.7975413e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 4.87           |
| explained_variance | 0.00147        |
| fps                | 24             |
| n_updates          | 1113           |
| policy_entropy     | 2.2741551      |
| policy_loss        | -0.00062549114 |
| serial_timesteps   | 142464         |
| time_elapsed       | 8.81e+03       |
| total_timesteps    | 142464         |
| value_loss         | 0.004248708    |
---------------------------------------
--------------------------------------
| approxkl           | 5.5341232e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 4.87          |
| explained_variance | 0.00324       |
| fps                | 23            |
| n_updates          | 1114          |
| policy_entropy     | 2.2849183     |
| policy_loss        | -0.0013972744 |
| serial_t

---------------------------------------
| approxkl           | 1.3517336e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 4.87           |
| explained_variance | 0.00256        |
| fps                | 24             |
| n_updates          | 1127           |
| policy_entropy     | 2.3106585      |
| policy_loss        | -0.00028372556 |
| serial_timesteps   | 144256         |
| time_elapsed       | 8.91e+03       |
| total_timesteps    | 144256         |
| value_loss         | 0.003135631    |
---------------------------------------
---------------------------------------
| approxkl           | 1.453013e-05   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 4.87           |
| explained_variance | 0.000517       |
| fps                | 24             |
| n_updates          | 1128           |
| policy_entropy     | 2.3024735      |
| policy_loss        | -0.00081594987 |


Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 5.912908e-05  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.17e+03      |
| ep_reward_mean     | 4.81          |
| explained_variance | -0.000261     |
| fps                | 8             |
| n_updates          | 1141          |
| policy_entropy     | 2.2706718     |
| policy_loss        | -0.0015512854 |
| serial_timesteps   | 146048        |
| time_elapsed       | 9.02e+03      |
| total_timesteps    | 146048        |
| value_loss         | 0.0044206157  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0006702427  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.17e+03      |
| ep_reward_mean     | 4.81          |
| explained_variance | 0.0788        |
| fps                | 24            |
| n_updates          | 1142          |
| policy_entropy     | 2.21119

Round done
---------------------------------------
| approxkl           | 1.2079358e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.17e+03       |
| ep_reward_mean     | 4.81           |
| explained_variance | 0.00146        |
| fps                | 11             |
| n_updates          | 1155           |
| policy_entropy     | 2.272933       |
| policy_loss        | -0.00016820058 |
| serial_timesteps   | 147840         |
| time_elapsed       | 9.13e+03       |
| total_timesteps    | 147840         |
| value_loss         | 0.0041193767   |
---------------------------------------
--------------------------------------
| approxkl           | 2.6313105e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.17e+03      |
| ep_reward_mean     | 4.81          |
| explained_variance | 0.00182       |
| fps                | 24            |
| n_updates          | 1156          |
| policy_entropy     | 2.2716227     |
| policy_loss        | -0.0001568459 |

--------------------------------------
| approxkl           | 2.0375348e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.15e+03      |
| ep_reward_mean     | 4.75          |
| explained_variance | 0.00211       |
| fps                | 24            |
| n_updates          | 1169          |
| policy_entropy     | 2.2028928     |
| policy_loss        | 4.7273934e-06 |
| serial_timesteps   | 149632        |
| time_elapsed       | 9.25e+03      |
| total_timesteps    | 149632        |
| value_loss         | 0.009164441   |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 1.1762933e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.15e+03       |
| ep_reward_mean     | 4.75           |
| explained_variance | 0.00184        |
| fps                | 11             |
| n_updates          | 1170           |
| policy_entropy     | 2.2044764      |
| policy_loss        | -0.00016280822 |
| se

--------------------------------------
| approxkl           | 9.744642e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.13e+03      |
| ep_reward_mean     | 4.75          |
| explained_variance | 9.7e-05       |
| fps                | 24            |
| n_updates          | 1183          |
| policy_entropy     | 2.1656973     |
| policy_loss        | -0.0007202327 |
| serial_timesteps   | 151424        |
| time_elapsed       | 9.36e+03      |
| total_timesteps    | 151424        |
| value_loss         | 0.00502817    |
--------------------------------------
Stage done
--------------------------------------
| approxkl           | 6.4445667e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.13e+03      |
| ep_reward_mean     | 4.75          |
| explained_variance | -0.000629     |
| fps                | 10            |
| n_updates          | 1184          |
| policy_entropy     | 2.1591287     |
| policy_loss        | 2.4607405e-05 |
| serial_times

--------------------------------------
| approxkl           | 3.9993342e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.13e+03      |
| ep_reward_mean     | 4.75          |
| explained_variance | -0.000329     |
| fps                | 24            |
| n_updates          | 1197          |
| policy_entropy     | 2.097584      |
| policy_loss        | -0.0003271252 |
| serial_timesteps   | 153216        |
| time_elapsed       | 9.48e+03      |
| total_timesteps    | 153216        |
| value_loss         | 0.005847292   |
--------------------------------------
---------------------------------------
| approxkl           | 2.7083163e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.13e+03       |
| ep_reward_mean     | 4.75           |
| explained_variance | 0.0009         |
| fps                | 23             |
| n_updates          | 1198           |
| policy_entropy     | 2.1012492      |
| policy_loss        | -0.00018941797 |
| serial_timest

---------------------------------------
| approxkl           | 5.075819e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.14e+03       |
| ep_reward_mean     | 4.85           |
| explained_variance | 0.00149        |
| fps                | 24             |
| n_updates          | 1211           |
| policy_entropy     | 2.0693502      |
| policy_loss        | -5.2606687e-05 |
| serial_timesteps   | 155008         |
| time_elapsed       | 9.59e+03       |
| total_timesteps    | 155008         |
| value_loss         | 0.0023332988   |
---------------------------------------
---------------------------------------
| approxkl           | 8.442197e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.14e+03       |
| ep_reward_mean     | 4.85           |
| explained_variance | -0.00087       |
| fps                | 24             |
| n_updates          | 1212           |
| policy_entropy     | 2.067373       |
| policy_loss        | -2.9254705e-05 |


--------------------------------------
| approxkl           | 4.4021976e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.12e+03      |
| ep_reward_mean     | 4.81          |
| explained_variance | -0.00108      |
| fps                | 23            |
| n_updates          | 1225          |
| policy_entropy     | 2.1418753     |
| policy_loss        | 0.00021801563 |
| serial_timesteps   | 156800        |
| time_elapsed       | 9.7e+03       |
| total_timesteps    | 156800        |
| value_loss         | 0.005874608   |
--------------------------------------
---------------------------------------
| approxkl           | 1.2685538e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.12e+03       |
| ep_reward_mean     | 4.81           |
| explained_variance | -0.00178       |
| fps                | 24             |
| n_updates          | 1226           |
| policy_entropy     | 2.142332       |
| policy_loss        | -0.00014546886 |
| serial_timest

Round done
---------------------------------------
| approxkl           | 5.7710963e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.12e+03       |
| ep_reward_mean     | 4.81           |
| explained_variance | 0.000729       |
| fps                | 10             |
| n_updates          | 1239           |
| policy_entropy     | 2.1435335      |
| policy_loss        | -0.00011978112 |
| serial_timesteps   | 158592         |
| time_elapsed       | 9.8e+03        |
| total_timesteps    | 158592         |
| value_loss         | 0.0060859015   |
---------------------------------------
--------------------------------------
| approxkl           | 2.8976235e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.12e+03      |
| ep_reward_mean     | 4.81          |
| explained_variance | 0.00124       |
| fps                | 24            |
| n_updates          | 1240          |
| policy_entropy     | 2.1485872     |
| policy_loss        | 4.1028485e-05 |

--------------------------------------
| approxkl           | 1.1141624e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.12e+03      |
| ep_reward_mean     | 4.81          |
| explained_variance | 8.4e-05       |
| fps                | 23            |
| n_updates          | 1253          |
| policy_entropy     | 2.153652      |
| policy_loss        | -7.156283e-05 |
| serial_timesteps   | 160384        |
| time_elapsed       | 9.92e+03      |
| total_timesteps    | 160384        |
| value_loss         | 0.0052347565  |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 4.483219e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.12e+03       |
| ep_reward_mean     | 4.81           |
| explained_variance | -0.000411      |
| fps                | 11             |
| n_updates          | 1254           |
| policy_entropy     | 2.1546943      |
| policy_loss        | -0.00038662925 |
| se

Round done
--------------------------------------
| approxkl           | 4.529171e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.15e+03      |
| ep_reward_mean     | 4.82          |
| explained_variance | -0.000673     |
| fps                | 11            |
| n_updates          | 1267          |
| policy_entropy     | 2.1755624     |
| policy_loss        | 0.00013760943 |
| serial_timesteps   | 162176        |
| time_elapsed       | 1e+04         |
| total_timesteps    | 162176        |
| value_loss         | 0.0024484368  |
--------------------------------------
---------------------------------------
| approxkl           | 2.4489145e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.15e+03       |
| ep_reward_mean     | 4.82           |
| explained_variance | -0.000982      |
| fps                | 24             |
| n_updates          | 1268           |
| policy_entropy     | 2.1728275      |
| policy_loss        | -0.00019036047 |
| se

--------------------------------------
| approxkl           | 1.4314815e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.14e+03      |
| ep_reward_mean     | 4.79          |
| explained_variance | -0.00915      |
| fps                | 23            |
| n_updates          | 1281          |
| policy_entropy     | 2.1775217     |
| policy_loss        | 0.00012925267 |
| serial_timesteps   | 163968        |
| time_elapsed       | 1.01e+04      |
| total_timesteps    | 163968        |
| value_loss         | 0.004775698   |
--------------------------------------
---------------------------------------
| approxkl           | 4.292373e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.14e+03       |
| ep_reward_mean     | 4.79           |
| explained_variance | -5.6e-05       |
| fps                | 23             |
| n_updates          | 1282           |
| policy_entropy     | 2.1872296      |
| policy_loss        | -0.00010882225 |
| serial_timest

--------------------------------------
| approxkl           | 1.6696351e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.14e+03      |
| ep_reward_mean     | 4.79          |
| explained_variance | 0.000121      |
| fps                | 24            |
| n_updates          | 1295          |
| policy_entropy     | 2.1660764     |
| policy_loss        | -0.000840161  |
| serial_timesteps   | 165760        |
| time_elapsed       | 1.03e+04      |
| total_timesteps    | 165760        |
| value_loss         | 0.005100629   |
--------------------------------------
---------------------------------------
| approxkl           | 1.4980151e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.14e+03       |
| ep_reward_mean     | 4.79           |
| explained_variance | -0.000196      |
| fps                | 23             |
| n_updates          | 1296           |
| policy_entropy     | 2.1567001      |
| policy_loss        | -0.00018812716 |
| serial_timest

--------------------------------------
| approxkl           | 7.397478e-05  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.15e+03      |
| ep_reward_mean     | 4.79          |
| explained_variance | -0.00983      |
| fps                | 24            |
| n_updates          | 1309          |
| policy_entropy     | 2.1453722     |
| policy_loss        | -0.0021451954 |
| serial_timesteps   | 167552        |
| time_elapsed       | 1.04e+04      |
| total_timesteps    | 167552        |
| value_loss         | 0.0031404952  |
--------------------------------------
--------------------------------------
| approxkl           | 6.1304716e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.15e+03      |
| ep_reward_mean     | 4.79          |
| explained_variance | 0.0041        |
| fps                | 24            |
| n_updates          | 1310          |
| policy_entropy     | 2.1688328     |
| policy_loss        | 0.00024538767 |
| serial_timesteps   | 16

--------------------------------------
| approxkl           | 2.6706325e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.15e+03      |
| ep_reward_mean     | 4.79          |
| explained_variance | 0.00151       |
| fps                | 24            |
| n_updates          | 1323          |
| policy_entropy     | 2.2300274     |
| policy_loss        | -9.627268e-05 |
| serial_timesteps   | 169344        |
| time_elapsed       | 1.05e+04      |
| total_timesteps    | 169344        |
| value_loss         | 0.0013805823  |
--------------------------------------
---------------------------------------
| approxkl           | 1.1881488e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.15e+03       |
| ep_reward_mean     | 4.79           |
| explained_variance | -0.000735      |
| fps                | 24             |
| n_updates          | 1324           |
| policy_entropy     | 2.2238455      |
| policy_loss        | -0.00096973404 |
| serial_timest

Round done
--------------------------------------
| approxkl           | 1.8121053e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.15e+03      |
| ep_reward_mean     | 4.79          |
| explained_variance | -0.000123     |
| fps                | 11            |
| n_updates          | 1337          |
| policy_entropy     | 2.1175463     |
| policy_loss        | -0.0006393604 |
| serial_timesteps   | 171136        |
| time_elapsed       | 1.06e+04      |
| total_timesteps    | 171136        |
| value_loss         | 0.0020692244  |
--------------------------------------
---------------------------------------
| approxkl           | 1.37249035e-05 |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.15e+03       |
| ep_reward_mean     | 4.79           |
| explained_variance | 0.000794       |
| fps                | 24             |
| n_updates          | 1338           |
| policy_entropy     | 2.106226       |
| policy_loss        | -4.8857182e-05 |
| se

--------------------------------------
| approxkl           | 4.8028123e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.17e+03      |
| ep_reward_mean     | 4.85          |
| explained_variance | 0.00011       |
| fps                | 24            |
| n_updates          | 1351          |
| policy_entropy     | 2.123509      |
| policy_loss        | 0.00031741755 |
| serial_timesteps   | 172928        |
| time_elapsed       | 1.07e+04      |
| total_timesteps    | 172928        |
| value_loss         | 0.012512642   |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 3.7586136e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.17e+03      |
| ep_reward_mean     | 4.85          |
| explained_variance | 0.000632      |
| fps                | 11            |
| n_updates          | 1352          |
| policy_entropy     | 2.1208398     |
| policy_loss        | 4.5811757e-06 |
| serial_times

Round done
---------------------------------------
| approxkl           | 6.0735e-06     |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.15e+03       |
| ep_reward_mean     | 4.81           |
| explained_variance | -0.00235       |
| fps                | 11             |
| n_updates          | 1365           |
| policy_entropy     | 2.0327427      |
| policy_loss        | -0.00032072328 |
| serial_timesteps   | 174720         |
| time_elapsed       | 1.08e+04       |
| total_timesteps    | 174720         |
| value_loss         | 0.004121485    |
---------------------------------------
---------------------------------------
| approxkl           | 1.354644e-05   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.15e+03       |
| ep_reward_mean     | 4.81           |
| explained_variance | -0.00094       |
| fps                | 23             |
| n_updates          | 1366           |
| policy_entropy     | 2.0419252      |
| policy_loss        | -0.000

---------------------------------------
| approxkl           | 1.6910146e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.15e+03       |
| ep_reward_mean     | 4.81           |
| explained_variance | 0.00108        |
| fps                | 23             |
| n_updates          | 1379           |
| policy_entropy     | 2.102902       |
| policy_loss        | -0.00051370077 |
| serial_timesteps   | 176512         |
| time_elapsed       | 1.09e+04       |
| total_timesteps    | 176512         |
| value_loss         | 0.0036525764   |
---------------------------------------
---------------------------------------
| approxkl           | 1.601454e-05   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.15e+03       |
| ep_reward_mean     | 4.81           |
| explained_variance | -0.00176       |
| fps                | 23             |
| n_updates          | 1380           |
| policy_entropy     | 2.113957       |
| policy_loss        | -6.0837716e-05 |


Round done
---------------------------------------
| approxkl           | 9.2037935e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 4.9            |
| explained_variance | 0.0232         |
| fps                | 11             |
| n_updates          | 1393           |
| policy_entropy     | 2.1076784      |
| policy_loss        | -0.00016103219 |
| serial_timesteps   | 178304         |
| time_elapsed       | 1.1e+04        |
| total_timesteps    | 178304         |
| value_loss         | 0.0033957916   |
---------------------------------------
--------------------------------------
| approxkl           | 3.7123261e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.18e+03      |
| ep_reward_mean     | 4.9           |
| explained_variance | 0.00873       |
| fps                | 24            |
| n_updates          | 1394          |
| policy_entropy     | 2.134971      |
| policy_loss        | -7.873401e-06 |

Stage done
--------------------------------------
| approxkl           | 4.0292393e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.18e+03      |
| ep_reward_mean     | 4.9           |
| explained_variance | -9.61e-05     |
| fps                | 10            |
| n_updates          | 1407          |
| policy_entropy     | 2.1805956     |
| policy_loss        | 0.00017340109 |
| serial_timesteps   | 180096        |
| time_elapsed       | 1.11e+04      |
| total_timesteps    | 180096        |
| value_loss         | 0.0030556885  |
--------------------------------------
---------------------------------------
| approxkl           | 7.3249754e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 4.9            |
| explained_variance | 0.000515       |
| fps                | 24             |
| n_updates          | 1408           |
| policy_entropy     | 2.1825452      |
| policy_loss        | -5.4148957e-05 |
| se

Round done
--------------------------------------
| approxkl           | 1.8540638e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.18e+03      |
| ep_reward_mean     | 4.9           |
| explained_variance | 0.000925      |
| fps                | 11            |
| n_updates          | 1421          |
| policy_entropy     | 2.1870003     |
| policy_loss        | 5.749939e-05  |
| serial_timesteps   | 181888        |
| time_elapsed       | 1.12e+04      |
| total_timesteps    | 181888        |
| value_loss         | 0.00803684    |
--------------------------------------
---------------------------------------
| approxkl           | 6.2642647e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 4.9            |
| explained_variance | -0.000266      |
| fps                | 23             |
| n_updates          | 1422           |
| policy_entropy     | 2.1874313      |
| policy_loss        | -0.00013132393 |
| se

---------------------------------------
| approxkl           | 1.51056065e-05 |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 4.93           |
| explained_variance | -0.00198       |
| fps                | 23             |
| n_updates          | 1435           |
| policy_entropy     | 2.183926       |
| policy_loss        | -9.0123154e-05 |
| serial_timesteps   | 183680         |
| time_elapsed       | 1.14e+04       |
| total_timesteps    | 183680         |
| value_loss         | 0.0041396134   |
---------------------------------------
--------------------------------------
| approxkl           | 4.192457e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 4.93          |
| explained_variance | 0.000352      |
| fps                | 24            |
| n_updates          | 1436          |
| policy_entropy     | 2.1761131     |
| policy_loss        | 2.8432347e-05 |
| serial_t

---------------------------------------
| approxkl           | 9.4180996e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 4.93           |
| explained_variance | 0.00107        |
| fps                | 24             |
| n_updates          | 1449           |
| policy_entropy     | 2.1531436      |
| policy_loss        | -0.00011065602 |
| serial_timesteps   | 185472         |
| time_elapsed       | 1.15e+04       |
| total_timesteps    | 185472         |
| value_loss         | 0.002804596    |
---------------------------------------
--------------------------------------
| approxkl           | 1.0032209e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 4.93          |
| explained_variance | -0.000392     |
| fps                | 24            |
| n_updates          | 1450          |
| policy_entropy     | 2.1564698     |
| policy_loss        | -0.0007655383 |
| serial_t

--------------------------------------
| approxkl           | 1.4468114e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 4.89          |
| explained_variance | 0.00129       |
| fps                | 23            |
| n_updates          | 1463          |
| policy_entropy     | 2.1813283     |
| policy_loss        | -8.048862e-05 |
| serial_timesteps   | 187264        |
| time_elapsed       | 1.16e+04      |
| total_timesteps    | 187264        |
| value_loss         | 0.005791723   |
--------------------------------------
---------------------------------------
| approxkl           | 9.008452e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 4.89           |
| explained_variance | -0.000404      |
| fps                | 23             |
| n_updates          | 1464           |
| policy_entropy     | 2.1822815      |
| policy_loss        | -0.00015600212 |
| serial_timest

---------------------------------------
| approxkl           | 1.1275874e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 4.89           |
| explained_variance | 8.77e-05       |
| fps                | 24             |
| n_updates          | 1477           |
| policy_entropy     | 2.202466       |
| policy_loss        | -5.6743622e-05 |
| serial_timesteps   | 189056         |
| time_elapsed       | 1.17e+04       |
| total_timesteps    | 189056         |
| value_loss         | 0.010356717    |
---------------------------------------
Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
-------------------------------------
| approxkl           | 6.465164e-05 |
| clipfrac           | 0.0          |
| ep_len_mean        | 3.2e+03      |
| ep_reward_mean     | 4.86         |
| explained_variance | 0.163        |
| fps                | 8            |
| n_updates          | 1478         |
| policy_entropy     | 

---------------------------------------
| approxkl           | 1.7804275e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 4.86           |
| explained_variance | -0.000561      |
| fps                | 24             |
| n_updates          | 1491           |
| policy_entropy     | 2.1793838      |
| policy_loss        | -5.3824857e-05 |
| serial_timesteps   | 190848         |
| time_elapsed       | 1.18e+04       |
| total_timesteps    | 190848         |
| value_loss         | 0.004221468    |
---------------------------------------
Round done
--------------------------------------
| approxkl           | 8.8505215e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 4.86          |
| explained_variance | -0.00019      |
| fps                | 10            |
| n_updates          | 1492          |
| policy_entropy     | 2.1803124     |
| policy_loss        | 4.5634806e-06 |

Round done
---------------------------------------
| approxkl           | 4.3496692e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 4.81           |
| explained_variance | 0.00539        |
| fps                | 10             |
| n_updates          | 1505           |
| policy_entropy     | 2.1986463      |
| policy_loss        | -7.9996884e-05 |
| serial_timesteps   | 192640         |
| time_elapsed       | 1.19e+04       |
| total_timesteps    | 192640         |
| value_loss         | 0.0035041873   |
---------------------------------------
--------------------------------------
| approxkl           | 7.0161745e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.19e+03      |
| ep_reward_mean     | 4.81          |
| explained_variance | 0.000962      |
| fps                | 24            |
| n_updates          | 1506          |
| policy_entropy     | 2.1968458     |
| policy_loss        | -0.0004055649 |

---------------------------------------
| approxkl           | 2.1926357e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 4.81           |
| explained_variance | 0.000295       |
| fps                | 23             |
| n_updates          | 1519           |
| policy_entropy     | 2.179719       |
| policy_loss        | -0.00012570992 |
| serial_timesteps   | 194432         |
| time_elapsed       | 1.2e+04        |
| total_timesteps    | 194432         |
| value_loss         | 0.00434404     |
---------------------------------------
---------------------------------------
| approxkl           | 1.3776256e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 4.81           |
| explained_variance | 0.000353       |
| fps                | 23             |
| n_updates          | 1520           |
| policy_entropy     | 2.1813428      |
| policy_loss        | -0.00024529546 |


Stage done
---------------------------------------
| approxkl           | 3.0680155e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 4.79           |
| explained_variance | -0.000709      |
| fps                | 9              |
| n_updates          | 1533           |
| policy_entropy     | 2.1892748      |
| policy_loss        | -0.00026099943 |
| serial_timesteps   | 196224         |
| time_elapsed       | 1.21e+04       |
| total_timesteps    | 196224         |
| value_loss         | 0.0069370186   |
---------------------------------------
---------------------------------------
| approxkl           | 1.0608694e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 4.79           |
| explained_variance | -0.000848      |
| fps                | 24             |
| n_updates          | 1534           |
| policy_entropy     | 2.196963       |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 3.1924184e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.18e+03      |
| ep_reward_mean     | 4.79          |
| explained_variance | -0.0034       |
| fps                | 24            |
| n_updates          | 1547          |
| policy_entropy     | 2.2775207     |
| policy_loss        | -0.0002267547 |
| serial_timesteps   | 198016        |
| time_elapsed       | 1.22e+04      |
| total_timesteps    | 198016        |
| value_loss         | 0.006080552   |
--------------------------------------
--------------------------------------
| approxkl           | 1.0144571e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.18e+03      |
| ep_reward_mean     | 4.79          |
| explained_variance | -0.00179      |
| fps                | 24            |
| n_updates          | 1548          |
| policy_entropy     | 2.286668      |
| policy_loss        | 0.00034188293 |
| serial_timesteps   | 19

Round done
--------------------------------------
| approxkl           | 3.8030848e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.18e+03      |
| ep_reward_mean     | 4.79          |
| explained_variance | 0.000154      |
| fps                | 11            |
| n_updates          | 1561          |
| policy_entropy     | 2.2840004     |
| policy_loss        | 2.9724091e-05 |
| serial_timesteps   | 199808        |
| time_elapsed       | 1.23e+04      |
| total_timesteps    | 199808        |
| value_loss         | 0.004979131   |
--------------------------------------
---------------------------------------
| approxkl           | 9.3400575e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 4.79           |
| explained_variance | -9.74e-05      |
| fps                | 23             |
| n_updates          | 1562           |
| policy_entropy     | 2.2812202      |
| policy_loss        | -0.00016378425 |
| se

Round done
---------------------------------------
| approxkl           | 1.2968521e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 4.75           |
| explained_variance | -0.00577       |
| fps                | 11             |
| n_updates          | 1575           |
| policy_entropy     | 2.3164692      |
| policy_loss        | -3.4324825e-05 |
| serial_timesteps   | 201600         |
| time_elapsed       | 1.24e+04       |
| total_timesteps    | 201600         |
| value_loss         | 0.0033388846   |
---------------------------------------
---------------------------------------
| approxkl           | 4.3797576e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 4.75           |
| explained_variance | 0.00329        |
| fps                | 24             |
| n_updates          | 1576           |
| policy_entropy     | 2.3175468      |
| policy_loss        | -0.000

Stage done
--------------------------------------
| approxkl           | 9.177597e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.19e+03      |
| ep_reward_mean     | 4.75          |
| explained_variance | 0.000461      |
| fps                | 9             |
| n_updates          | 1589          |
| policy_entropy     | 2.3176618     |
| policy_loss        | 6.0535967e-05 |
| serial_timesteps   | 203392        |
| time_elapsed       | 1.26e+04      |
| total_timesteps    | 203392        |
| value_loss         | 0.0041838167  |
--------------------------------------
---------------------------------------
| approxkl           | 3.3485185e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 4.75           |
| explained_variance | -0.00036       |
| fps                | 24             |
| n_updates          | 1590           |
| policy_entropy     | 2.3213992      |
| policy_loss        | -0.00021142513 |
| se

Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
---------------------------------------
| approxkl           | 3.4362356e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.77           |
| explained_variance | 0.023          |
| fps                | 8              |
| n_updates          | 1603           |
| policy_entropy     | 2.3139         |
| policy_loss        | -0.00030251965 |
| serial_timesteps   | 205184         |
| time_elapsed       | 1.27e+04       |
| total_timesteps    | 205184         |
| value_loss         | 0.010337315    |
---------------------------------------
---------------------------------------
| approxkl           | 1.1465461e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.77           |
| explained_variance | 0.0229         |
| fps                | 24             |
| n_updates          | 1604           |
| polic

Stage done
---------------------------------------
| approxkl           | 3.6277622e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.17e+03       |
| ep_reward_mean     | 4.7            |
| explained_variance | -3.97e-05      |
| fps                | 10             |
| n_updates          | 1617           |
| policy_entropy     | 2.3538742      |
| policy_loss        | -0.00013128482 |
| serial_timesteps   | 206976         |
| time_elapsed       | 1.28e+04       |
| total_timesteps    | 206976         |
| value_loss         | 0.005868958    |
---------------------------------------
--------------------------------------
| approxkl           | 1.626612e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.17e+03      |
| ep_reward_mean     | 4.7           |
| explained_variance | 0.00115       |
| fps                | 24            |
| n_updates          | 1618          |
| policy_entropy     | 2.3557985     |
| policy_loss        | 2.5400892e-05 |

Stage done
---------------------------------------
| approxkl           | 2.3850575e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.17e+03       |
| ep_reward_mean     | 4.7            |
| explained_variance | 0.000631       |
| fps                | 10             |
| n_updates          | 1631           |
| policy_entropy     | 2.3794262      |
| policy_loss        | -6.9603324e-05 |
| serial_timesteps   | 208768         |
| time_elapsed       | 1.29e+04       |
| total_timesteps    | 208768         |
| value_loss         | 0.003394335    |
---------------------------------------
-------------------------------------
| approxkl           | 9.620552e-07 |
| clipfrac           | 0.0          |
| ep_len_mean        | 3.17e+03     |
| ep_reward_mean     | 4.7          |
| explained_variance | 5.56e-05     |
| fps                | 23           |
| n_updates          | 1632         |
| policy_entropy     | 2.379529     |
| policy_loss        | -9.65558e-05 |
| serial_

---------------------------------------
| approxkl           | 2.93436e-06    |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 4.7            |
| explained_variance | -0.00408       |
| fps                | 24             |
| n_updates          | 1645           |
| policy_entropy     | 2.3716388      |
| policy_loss        | -8.6588785e-05 |
| serial_timesteps   | 210560         |
| time_elapsed       | 1.3e+04        |
| total_timesteps    | 210560         |
| value_loss         | 0.006045272    |
---------------------------------------
---------------------------------------
| approxkl           | 1.3289048e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 4.7            |
| explained_variance | 0.00225        |
| fps                | 24             |
| n_updates          | 1646           |
| policy_entropy     | 2.3684974      |
| policy_loss        | -2.0298176e-05 |


--------------------------------------
| approxkl           | 1.1402435e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.15e+03      |
| ep_reward_mean     | 4.62          |
| explained_variance | 0.000371      |
| fps                | 23            |
| n_updates          | 1659          |
| policy_entropy     | 2.344007      |
| policy_loss        | 3.818143e-05  |
| serial_timesteps   | 212352        |
| time_elapsed       | 1.31e+04      |
| total_timesteps    | 212352        |
| value_loss         | 0.0069983904  |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 1.2929247e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.15e+03      |
| ep_reward_mean     | 4.62          |
| explained_variance | 0.00202       |
| fps                | 11            |
| n_updates          | 1660          |
| policy_entropy     | 2.3431213     |
| policy_loss        | -0.0002876874 |
| serial_times

--------------------------------------
| approxkl           | 0.00011820661 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.15e+03      |
| ep_reward_mean     | 4.58          |
| explained_variance | 0.00885       |
| fps                | 24            |
| n_updates          | 1673          |
| policy_entropy     | 2.3235564     |
| policy_loss        | -0.0020402204 |
| serial_timesteps   | 214144        |
| time_elapsed       | 1.32e+04      |
| total_timesteps    | 214144        |
| value_loss         | 0.0020541495  |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 5.7309553e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.15e+03      |
| ep_reward_mean     | 4.58          |
| explained_variance | -0.00578      |
| fps                | 12            |
| n_updates          | 1674          |
| policy_entropy     | 2.3360589     |
| policy_loss        | 3.908202e-05  |
| serial_times

---------------------------------------
| approxkl           | 2.4329538e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.15e+03       |
| ep_reward_mean     | 4.58           |
| explained_variance | -0.00105       |
| fps                | 24             |
| n_updates          | 1687           |
| policy_entropy     | 2.3258328      |
| policy_loss        | -0.00027955417 |
| serial_timesteps   | 215936         |
| time_elapsed       | 1.33e+04       |
| total_timesteps    | 215936         |
| value_loss         | 0.0036078596   |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 2.1616684e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.15e+03       |
| ep_reward_mean     | 4.58           |
| explained_variance | -0.00121       |
| fps                | 11             |
| n_updates          | 1688           |
| policy_entropy     | 2.3226943      |
| policy_loss        | -2.048

Round done
--------------------------------------
| approxkl           | 2.6174837e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.14e+03      |
| ep_reward_mean     | 4.56          |
| explained_variance | 0.000512      |
| fps                | 11            |
| n_updates          | 1701          |
| policy_entropy     | 2.3181047     |
| policy_loss        | -2.537854e-05 |
| serial_timesteps   | 217728        |
| time_elapsed       | 1.35e+04      |
| total_timesteps    | 217728        |
| value_loss         | 0.008672834   |
--------------------------------------
--------------------------------------
| approxkl           | 6.9443644e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.14e+03      |
| ep_reward_mean     | 4.56          |
| explained_variance | 0.00206       |
| fps                | 24            |
| n_updates          | 1702          |
| policy_entropy     | 2.3193812     |
| policy_loss        | 2.1487474e-05 |
| serial_times

--------------------------------------
| approxkl           | 2.3160689e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.11e+03      |
| ep_reward_mean     | 4.53          |
| explained_variance | 0.00171       |
| fps                | 24            |
| n_updates          | 1715          |
| policy_entropy     | 2.3324113     |
| policy_loss        | 3.5837293e-06 |
| serial_timesteps   | 219520        |
| time_elapsed       | 1.36e+04      |
| total_timesteps    | 219520        |
| value_loss         | 0.007396913   |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 3.5922625e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.11e+03       |
| ep_reward_mean     | 4.53           |
| explained_variance | -0.0013        |
| fps                | 11             |
| n_updates          | 1716           |
| policy_entropy     | 2.3325093      |
| policy_loss        | -0.00016912445 |
| se

---------------------------------------
| approxkl           | 7.17065e-06    |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.1e+03        |
| ep_reward_mean     | 4.54           |
| explained_variance | -0.00426       |
| fps                | 23             |
| n_updates          | 1729           |
| policy_entropy     | 2.308682       |
| policy_loss        | -0.00034222566 |
| serial_timesteps   | 221312         |
| time_elapsed       | 1.37e+04       |
| total_timesteps    | 221312         |
| value_loss         | 0.0044710557   |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 1.04727005e-05 |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.1e+03        |
| ep_reward_mean     | 4.54           |
| explained_variance | -0.00109       |
| fps                | 12             |
| n_updates          | 1730           |
| policy_entropy     | 2.3040977      |
| policy_loss        | -0.000

---------------------------------------
| approxkl           | 1.4251066e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.1e+03        |
| ep_reward_mean     | 4.54           |
| explained_variance | 0.000456       |
| fps                | 24             |
| n_updates          | 1743           |
| policy_entropy     | 2.3038602      |
| policy_loss        | -0.00015665591 |
| serial_timesteps   | 223104         |
| time_elapsed       | 1.38e+04       |
| total_timesteps    | 223104         |
| value_loss         | 0.007785854    |
---------------------------------------
Stage done
--------------------------------------
| approxkl           | 3.8980484e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.1e+03       |
| ep_reward_mean     | 4.54          |
| explained_variance | -0.000209     |
| fps                | 9             |
| n_updates          | 1744          |
| policy_entropy     | 2.3057365     |
| policy_loss        | -0.0005541891 |

-------------------------------------
| approxkl           | 5.136762e-07 |
| clipfrac           | 0.0          |
| ep_len_mean        | 3.1e+03      |
| ep_reward_mean     | 4.54         |
| explained_variance | -0.000638    |
| fps                | 23           |
| n_updates          | 1757         |
| policy_entropy     | 2.3235993    |
| policy_loss        | -0.000141548 |
| serial_timesteps   | 224896       |
| time_elapsed       | 1.39e+04     |
| total_timesteps    | 224896       |
| value_loss         | 0.0042224177 |
-------------------------------------
Round done
---------------------------------------
| approxkl           | 3.0369624e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.1e+03        |
| ep_reward_mean     | 4.54           |
| explained_variance | 0.000172       |
| fps                | 11             |
| n_updates          | 1758           |
| policy_entropy     | 2.3249335      |
| policy_loss        | -0.00023701787 |
| serial_timesteps 

Round done
--------------------------------------
| approxkl           | 0.00013392791 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.14e+03      |
| ep_reward_mean     | 4.58          |
| explained_variance | -0.00274      |
| fps                | 11            |
| n_updates          | 1771          |
| policy_entropy     | 2.2303753     |
| policy_loss        | -0.0024159718 |
| serial_timesteps   | 226688        |
| time_elapsed       | 1.4e+04       |
| total_timesteps    | 226688        |
| value_loss         | 0.0046524475  |
--------------------------------------
---------------------------------------
| approxkl           | 0.000100575555 |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.14e+03       |
| ep_reward_mean     | 4.58           |
| explained_variance | 0.013          |
| fps                | 23             |
| n_updates          | 1772           |
| policy_entropy     | 2.225305       |
| policy_loss        | -0.00030222908 |
| se

---------------------------------------
| approxkl           | 1.6032378e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.14e+03       |
| ep_reward_mean     | 4.58           |
| explained_variance | 0.00134        |
| fps                | 24             |
| n_updates          | 1785           |
| policy_entropy     | 2.2856643      |
| policy_loss        | -5.6803226e-05 |
| serial_timesteps   | 228480         |
| time_elapsed       | 1.41e+04       |
| total_timesteps    | 228480         |
| value_loss         | 0.005612937    |
---------------------------------------
---------------------------------------
| approxkl           | 1.5383096e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.14e+03       |
| ep_reward_mean     | 4.58           |
| explained_variance | -0.00262       |
| fps                | 24             |
| n_updates          | 1786           |
| policy_entropy     | 2.2863665      |
| policy_loss        | -0.00016510859 |


---------------------------------------
| approxkl           | 1.8521938e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.13e+03       |
| ep_reward_mean     | 4.6            |
| explained_variance | 0.00251        |
| fps                | 23             |
| n_updates          | 1799           |
| policy_entropy     | 2.200658       |
| policy_loss        | -0.00035932288 |
| serial_timesteps   | 230272         |
| time_elapsed       | 1.42e+04       |
| total_timesteps    | 230272         |
| value_loss         | 0.002872909    |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 2.1638363e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.13e+03       |
| ep_reward_mean     | 4.6            |
| explained_variance | -0.00395       |
| fps                | 9              |
| n_updates          | 1800           |
| policy_entropy     | 2.1907215      |
| policy_loss        | -0.000

Round done
-------------------------------------
| approxkl           | 0.002230472  |
| clipfrac           | 0.009765625  |
| ep_len_mean        | 3.13e+03     |
| ep_reward_mean     | 4.6          |
| explained_variance | 0.0113       |
| fps                | 10           |
| n_updates          | 1813         |
| policy_entropy     | 1.7350409    |
| policy_loss        | -0.008435059 |
| serial_timesteps   | 232064       |
| time_elapsed       | 1.43e+04     |
| total_timesteps    | 232064       |
| value_loss         | 0.002914146  |
-------------------------------------
-------------------------------------
| approxkl           | 0.0005177795 |
| clipfrac           | 0.0          |
| ep_len_mean        | 3.13e+03     |
| ep_reward_mean     | 4.6          |
| explained_variance | -0.000947    |
| fps                | 23           |
| n_updates          | 1814         |
| policy_entropy     | 1.8681622    |
| policy_loss        | 0.0027094577 |
| serial_timesteps   | 232192       |
|

Round done
--------------------------------------
| approxkl           | 2.4868998e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.13e+03      |
| ep_reward_mean     | 4.6           |
| explained_variance | -5.26e-05     |
| fps                | 11            |
| n_updates          | 1827          |
| policy_entropy     | 2.0517335     |
| policy_loss        | 0.00018613227 |
| serial_timesteps   | 233856        |
| time_elapsed       | 1.44e+04      |
| total_timesteps    | 233856        |
| value_loss         | 0.0043921173  |
--------------------------------------
---------------------------------------
| approxkl           | 8.994793e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.13e+03       |
| ep_reward_mean     | 4.6            |
| explained_variance | 0.00398        |
| fps                | 24             |
| n_updates          | 1828           |
| policy_entropy     | 2.0640635      |
| policy_loss        | -0.00027082674 |
| se

--------------------------------------
| approxkl           | 2.2323933e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.17e+03      |
| ep_reward_mean     | 4.65          |
| explained_variance | -0.00108      |
| fps                | 24            |
| n_updates          | 1841          |
| policy_entropy     | 2.1015975     |
| policy_loss        | 0.00027659908 |
| serial_timesteps   | 235648        |
| time_elapsed       | 1.46e+04      |
| total_timesteps    | 235648        |
| value_loss         | 0.0038570755  |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 2.5789534e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.17e+03       |
| ep_reward_mean     | 4.65           |
| explained_variance | -0.00116       |
| fps                | 11             |
| n_updates          | 1842           |
| policy_entropy     | 2.115135       |
| policy_loss        | -0.00084689073 |
| se

Stage done
---------------------------------------
| approxkl           | 4.64154e-06    |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.15e+03       |
| ep_reward_mean     | 4.66           |
| explained_variance | -0.0017        |
| fps                | 9              |
| n_updates          | 1855           |
| policy_entropy     | 2.225019       |
| policy_loss        | -0.00017077103 |
| serial_timesteps   | 237440         |
| time_elapsed       | 1.47e+04       |
| total_timesteps    | 237440         |
| value_loss         | 0.010226279    |
---------------------------------------
---------------------------------------
| approxkl           | 1.2388506e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.15e+03       |
| ep_reward_mean     | 4.66           |
| explained_variance | 0.0011         |
| fps                | 24             |
| n_updates          | 1856           |
| policy_entropy     | 2.2233877      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 2.1814353e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.13e+03      |
| ep_reward_mean     | 4.64          |
| explained_variance | 0.00126       |
| fps                | 24            |
| n_updates          | 1869          |
| policy_entropy     | 2.0107818     |
| policy_loss        | 3.309548e-05  |
| serial_timesteps   | 239232        |
| time_elapsed       | 1.48e+04      |
| total_timesteps    | 239232        |
| value_loss         | 0.0030992352  |
--------------------------------------
---------------------------------------
| approxkl           | 3.2787816e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.13e+03       |
| ep_reward_mean     | 4.64           |
| explained_variance | 0.00257        |
| fps                | 23             |
| n_updates          | 1870           |
| policy_entropy     | 2.0163045      |
| policy_loss        | 0.000111270696 |
| serial_timest

Round done
---------------------------------------
| approxkl           | 2.3343869e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.13e+03       |
| ep_reward_mean     | 4.64           |
| explained_variance | 0.00238        |
| fps                | 11             |
| n_updates          | 1883           |
| policy_entropy     | 2.022405       |
| policy_loss        | -3.2756478e-05 |
| serial_timesteps   | 241024         |
| time_elapsed       | 1.49e+04       |
| total_timesteps    | 241024         |
| value_loss         | 0.006591867    |
---------------------------------------
--------------------------------------
| approxkl           | 9.58678e-06   |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.13e+03      |
| ep_reward_mean     | 4.64          |
| explained_variance | -0.000976     |
| fps                | 24            |
| n_updates          | 1884          |
| policy_entropy     | 2.019301      |
| policy_loss        | -0.0003682766 |

---------------------------------------
| approxkl           | 5.699632e-05   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.15e+03       |
| ep_reward_mean     | 4.62           |
| explained_variance | 0.00458        |
| fps                | 23             |
| n_updates          | 1897           |
| policy_entropy     | 2.0025527      |
| policy_loss        | -9.0314075e-05 |
| serial_timesteps   | 242816         |
| time_elapsed       | 1.5e+04        |
| total_timesteps    | 242816         |
| value_loss         | 0.0023077177   |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 1.2008304e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.15e+03       |
| ep_reward_mean     | 4.62           |
| explained_variance | 0.0164         |
| fps                | 9              |
| n_updates          | 1898           |
| policy_entropy     | 2.0152137      |
| policy_loss        | -0.000

---------------------------------------
| approxkl           | 5.362623e-05   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.15e+03       |
| ep_reward_mean     | 4.62           |
| explained_variance | -0.00825       |
| fps                | 24             |
| n_updates          | 1911           |
| policy_entropy     | 2.0514383      |
| policy_loss        | -0.00074283406 |
| serial_timesteps   | 244608         |
| time_elapsed       | 1.51e+04       |
| total_timesteps    | 244608         |
| value_loss         | 0.0022308207   |
---------------------------------------
----------------------------------------
| approxkl           | 3.674898e-05    |
| clipfrac           | 0.0             |
| ep_len_mean        | 3.15e+03        |
| ep_reward_mean     | 4.62            |
| explained_variance | 0.000168        |
| fps                | 23              |
| n_updates          | 1912            |
| policy_entropy     | 2.0659456       |
| policy_loss        | -0.00010

Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 8.7767825e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.12e+03      |
| ep_reward_mean     | 4.64          |
| explained_variance | 0.504         |
| fps                | 8             |
| n_updates          | 1925          |
| policy_entropy     | 1.5864865     |
| policy_loss        | -0.0017505814 |
| serial_timesteps   | 246400        |
| time_elapsed       | 1.52e+04      |
| total_timesteps    | 246400        |
| value_loss         | 0.0045884755  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0001729964  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.12e+03      |
| ep_reward_mean     | 4.64          |
| explained_variance | 0.103         |
| fps                | 23            |
| n_updates          | 1926          |
| policy_entropy     | 1.73436

--------------------------------------
| approxkl           | 5.6472985e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.1e+03       |
| ep_reward_mean     | 4.61          |
| explained_variance | 0.046         |
| fps                | 24            |
| n_updates          | 1939          |
| policy_entropy     | 1.7941946     |
| policy_loss        | 0.00043690205 |
| serial_timesteps   | 248192        |
| time_elapsed       | 1.53e+04      |
| total_timesteps    | 248192        |
| value_loss         | 0.0027671976  |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 3.8043716e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.1e+03       |
| ep_reward_mean     | 4.61          |
| explained_variance | -0.00252      |
| fps                | 11            |
| n_updates          | 1940          |
| policy_entropy     | 1.8205597     |
| policy_loss        | -0.001067176  |
| serial_times

---------------------------------------
| approxkl           | 6.3738844e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.1e+03        |
| ep_reward_mean     | 4.61           |
| explained_variance | -0.00274       |
| fps                | 24             |
| n_updates          | 1953           |
| policy_entropy     | 1.9756315      |
| policy_loss        | -0.00015387312 |
| serial_timesteps   | 249984         |
| time_elapsed       | 1.55e+04       |
| total_timesteps    | 249984         |
| value_loss         | 0.0045565097   |
---------------------------------------
--------------------------------------
| approxkl           | 6.2556326e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.1e+03       |
| ep_reward_mean     | 4.61          |
| explained_variance | -0.00118      |
| fps                | 24            |
| n_updates          | 1954          |
| policy_entropy     | 1.9973263     |
| policy_loss        | -0.0012712916 |
| serial_t

---------------------------------------
| approxkl           | 5.253254e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.1e+03        |
| ep_reward_mean     | 4.64           |
| explained_variance | -0.000132      |
| fps                | 23             |
| n_updates          | 1967           |
| policy_entropy     | 1.8506929      |
| policy_loss        | -0.00038127415 |
| serial_timesteps   | 251776         |
| time_elapsed       | 1.56e+04       |
| total_timesteps    | 251776         |
| value_loss         | 0.0066277133   |
---------------------------------------
Round done
--------------------------------------
| approxkl           | 1.336056e-05  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.1e+03       |
| ep_reward_mean     | 4.64          |
| explained_variance | 0.0046        |
| fps                | 11            |
| n_updates          | 1968          |
| policy_entropy     | 1.849715      |
| policy_loss        | -0.0005366206 |

---------------------------------------
| approxkl           | 7.386676e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.1e+03        |
| ep_reward_mean     | 4.64           |
| explained_variance | 0.00183        |
| fps                | 24             |
| n_updates          | 1981           |
| policy_entropy     | 2.083117       |
| policy_loss        | -0.00049222726 |
| serial_timesteps   | 253568         |
| time_elapsed       | 1.57e+04       |
| total_timesteps    | 253568         |
| value_loss         | 0.0049239015   |
---------------------------------------
Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 1.4345869e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.09e+03      |
| ep_reward_mean     | 4.66          |
| explained_variance | 0.0577        |
| fps                | 8             |
| n_updates          | 1982          |
| policy_entrop

Round done
---------------------------------------
| approxkl           | 5.737498e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.09e+03       |
| ep_reward_mean     | 4.66           |
| explained_variance | -0.00104       |
| fps                | 11             |
| n_updates          | 1995           |
| policy_entropy     | 1.9819436      |
| policy_loss        | -0.00011222251 |
| serial_timesteps   | 255360         |
| time_elapsed       | 1.58e+04       |
| total_timesteps    | 255360         |
| value_loss         | 0.0029001648   |
---------------------------------------
--------------------------------------
| approxkl           | 2.9106357e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.09e+03      |
| ep_reward_mean     | 4.66          |
| explained_variance | -0.000171     |
| fps                | 23            |
| n_updates          | 1996          |
| policy_entropy     | 1.9864272     |
| policy_loss        | -9.505078e-05 |

---------------------------------------
| approxkl           | 1.4423049e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.1e+03        |
| ep_reward_mean     | 4.7            |
| explained_variance | -0.0593        |
| fps                | 23             |
| n_updates          | 2009           |
| policy_entropy     | 1.71701        |
| policy_loss        | -0.00067520794 |
| serial_timesteps   | 257152         |
| time_elapsed       | 1.59e+04       |
| total_timesteps    | 257152         |
| value_loss         | 0.0031503174   |
---------------------------------------
---------------------------------------
| approxkl           | 1.4255593e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.1e+03        |
| ep_reward_mean     | 4.7            |
| explained_variance | 0.0322         |
| fps                | 23             |
| n_updates          | 2010           |
| policy_entropy     | 1.9015204      |
| policy_loss        | -0.00047536194 |


--------------------------------------
| approxkl           | 1.4092088e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.1e+03       |
| ep_reward_mean     | 4.7           |
| explained_variance | 0.00027       |
| fps                | 23            |
| n_updates          | 2023          |
| policy_entropy     | 2.0356069     |
| policy_loss        | -0.001061067  |
| serial_timesteps   | 258944        |
| time_elapsed       | 1.6e+04       |
| total_timesteps    | 258944        |
| value_loss         | 0.002903187   |
--------------------------------------
--------------------------------------
| approxkl           | 1.3755807e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.1e+03       |
| ep_reward_mean     | 4.7           |
| explained_variance | 0.00181       |
| fps                | 23            |
| n_updates          | 2024          |
| policy_entropy     | 2.0392885     |
| policy_loss        | 1.8962659e-05 |
| serial_timesteps   | 25

--------------------------------------
| approxkl           | 4.8716513e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.07e+03      |
| ep_reward_mean     | 4.64          |
| explained_variance | -0.0352       |
| fps                | 23            |
| n_updates          | 2037          |
| policy_entropy     | 1.9786        |
| policy_loss        | -0.001222264  |
| serial_timesteps   | 260736        |
| time_elapsed       | 1.61e+04      |
| total_timesteps    | 260736        |
| value_loss         | 0.0019522753  |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 3.469773e-05   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.07e+03       |
| ep_reward_mean     | 4.64           |
| explained_variance | -0.0222        |
| fps                | 11             |
| n_updates          | 2038           |
| policy_entropy     | 2.0368032      |
| policy_loss        | -0.00087052584 |
| se

--------------------------------------
| approxkl           | 4.0193163e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.07e+03      |
| ep_reward_mean     | 4.64          |
| explained_variance | -0.000232     |
| fps                | 24            |
| n_updates          | 2051          |
| policy_entropy     | 2.1531649     |
| policy_loss        | 5.9951097e-05 |
| serial_timesteps   | 262528        |
| time_elapsed       | 1.62e+04      |
| total_timesteps    | 262528        |
| value_loss         | 0.0051092883  |
--------------------------------------
Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
---------------------------------------
| approxkl           | 1.020465e-05   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.05e+03       |
| ep_reward_mean     | 4.61           |
| explained_variance | 0.198          |
| fps                | 8              |
| n_updates          | 2052           |
| policy_entropy     |

Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 0.00012532606 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.04e+03      |
| ep_reward_mean     | 4.57          |
| explained_variance | 0.136         |
| fps                | 8             |
| n_updates          | 2065          |
| policy_entropy     | 2.028875      |
| policy_loss        | -0.0021350998 |
| serial_timesteps   | 264320        |
| time_elapsed       | 1.63e+04      |
| total_timesteps    | 264320        |
| value_loss         | 0.0061360337  |
--------------------------------------
--------------------------------------
| approxkl           | 8.9366804e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.04e+03      |
| ep_reward_mean     | 4.57          |
| explained_variance | 0.0108        |
| fps                | 24            |
| n_updates          | 2066          |
| policy_entropy     | 2.05701

-------------------------------------
| approxkl           | 5.431494e-06 |
| clipfrac           | 0.0          |
| ep_len_mean        | 3.04e+03     |
| ep_reward_mean     | 4.57         |
| explained_variance | 0.00761      |
| fps                | 24           |
| n_updates          | 2079         |
| policy_entropy     | 2.1135902    |
| policy_loss        | 0.0002001673 |
| serial_timesteps   | 266112       |
| time_elapsed       | 1.65e+04     |
| total_timesteps    | 266112       |
| value_loss         | 0.004949356  |
-------------------------------------
Round done
---------------------------------------
| approxkl           | 5.497836e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.04e+03       |
| ep_reward_mean     | 4.57           |
| explained_variance | -0.00136       |
| fps                | 10             |
| n_updates          | 2080           |
| policy_entropy     | 2.1140118      |
| policy_loss        | -0.00026151258 |
| serial_timesteps 

--------------------------------------
| approxkl           | 1.8537752e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.03e+03      |
| ep_reward_mean     | 4.58          |
| explained_variance | -0.001        |
| fps                | 24            |
| n_updates          | 2093          |
| policy_entropy     | 2.0564694     |
| policy_loss        | -0.000649821  |
| serial_timesteps   | 267904        |
| time_elapsed       | 1.66e+04      |
| total_timesteps    | 267904        |
| value_loss         | 0.005027377   |
--------------------------------------
--------------------------------------
| approxkl           | 1.8902523e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.03e+03      |
| ep_reward_mean     | 4.58          |
| explained_variance | 0.000676      |
| fps                | 23            |
| n_updates          | 2094          |
| policy_entropy     | 2.0550601     |
| policy_loss        | -0.0003200397 |
| serial_timesteps   | 26

--------------------------------------
| approxkl           | 6.235695e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.03e+03      |
| ep_reward_mean     | 4.58          |
| explained_variance | 0.00223       |
| fps                | 24            |
| n_updates          | 2107          |
| policy_entropy     | 2.0943115     |
| policy_loss        | -0.0005152654 |
| serial_timesteps   | 269696        |
| time_elapsed       | 1.67e+04      |
| total_timesteps    | 269696        |
| value_loss         | 0.0109184105  |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 1.5850881e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.03e+03      |
| ep_reward_mean     | 4.58          |
| explained_variance | 0.000673      |
| fps                | 11            |
| n_updates          | 2108          |
| policy_entropy     | 2.1031997     |
| policy_loss        | -0.0004358366 |
| serial_times

---------------------------------------
| approxkl           | 2.6515454e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.03e+03       |
| ep_reward_mean     | 4.6            |
| explained_variance | -0.00138       |
| fps                | 23             |
| n_updates          | 2121           |
| policy_entropy     | 2.104891       |
| policy_loss        | -0.00014321972 |
| serial_timesteps   | 271488         |
| time_elapsed       | 1.68e+04       |
| total_timesteps    | 271488         |
| value_loss         | 0.003875176    |
---------------------------------------
--------------------------------------
| approxkl           | 1.0454627e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.03e+03      |
| ep_reward_mean     | 4.6           |
| explained_variance | 0.000487      |
| fps                | 24            |
| n_updates          | 2122          |
| policy_entropy     | 2.1033993     |
| policy_loss        | -0.0007290989 |
| serial_t

Round done
---------------------------------------
| approxkl           | 6.0906377e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.03e+03       |
| ep_reward_mean     | 4.6            |
| explained_variance | -0.00106       |
| fps                | 11             |
| n_updates          | 2135           |
| policy_entropy     | 2.1007357      |
| policy_loss        | -0.00046913745 |
| serial_timesteps   | 273280         |
| time_elapsed       | 1.69e+04       |
| total_timesteps    | 273280         |
| value_loss         | 0.0060245097   |
---------------------------------------
--------------------------------------
| approxkl           | 1.707225e-05  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.03e+03      |
| ep_reward_mean     | 4.6           |
| explained_variance | -0.00438      |
| fps                | 24            |
| n_updates          | 2136          |
| policy_entropy     | 2.0972965     |
| policy_loss        | -0.0008174572 |

Round done
--------------------------------------
| approxkl           | 3.338675e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.03e+03      |
| ep_reward_mean     | 4.6           |
| explained_variance | -0.00208      |
| fps                | 11            |
| n_updates          | 2149          |
| policy_entropy     | 2.164147      |
| policy_loss        | 4.8689544e-06 |
| serial_timesteps   | 275072        |
| time_elapsed       | 1.7e+04       |
| total_timesteps    | 275072        |
| value_loss         | 0.005095711   |
--------------------------------------
---------------------------------------
| approxkl           | 1.6257229e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.03e+03       |
| ep_reward_mean     | 4.6            |
| explained_variance | 0.000395       |
| fps                | 24             |
| n_updates          | 2150           |
| policy_entropy     | 2.1665473      |
| policy_loss        | -4.1713938e-05 |
| se

---------------------------------------
| approxkl           | 8.256548e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.06e+03       |
| ep_reward_mean     | 4.67           |
| explained_variance | 0.00344        |
| fps                | 24             |
| n_updates          | 2163           |
| policy_entropy     | 2.1455956      |
| policy_loss        | -0.00016550347 |
| serial_timesteps   | 276864         |
| time_elapsed       | 1.71e+04       |
| total_timesteps    | 276864         |
| value_loss         | 0.009238056    |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 5.189429e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.06e+03       |
| ep_reward_mean     | 4.67           |
| explained_variance | -0.00235       |
| fps                | 11             |
| n_updates          | 2164           |
| policy_entropy     | 2.1518118      |
| policy_loss        | -0.000

Round done
---------------------------------------
| approxkl           | 1.3031043e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.06e+03       |
| ep_reward_mean     | 4.67           |
| explained_variance | -8.42e-05      |
| fps                | 11             |
| n_updates          | 2177           |
| policy_entropy     | 2.1857338      |
| policy_loss        | -0.00043022446 |
| serial_timesteps   | 278656         |
| time_elapsed       | 1.72e+04       |
| total_timesteps    | 278656         |
| value_loss         | 0.009326203    |
---------------------------------------
---------------------------------------
| approxkl           | 9.757059e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.06e+03       |
| ep_reward_mean     | 4.67           |
| explained_variance | -0.0117        |
| fps                | 23             |
| n_updates          | 2178           |
| policy_entropy     | 2.1902652      |
| policy_loss        | -0.000

Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 4.5903565e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.08e+03      |
| ep_reward_mean     | 4.71          |
| explained_variance | 0.167         |
| fps                | 8             |
| n_updates          | 2191          |
| policy_entropy     | 2.0378537     |
| policy_loss        | -0.0016137082 |
| serial_timesteps   | 280448        |
| time_elapsed       | 1.74e+04      |
| total_timesteps    | 280448        |
| value_loss         | 0.0058693383  |
--------------------------------------
---------------------------------------
| approxkl           | 1.0556521e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.08e+03       |
| ep_reward_mean     | 4.71           |
| explained_variance | 0.0083         |
| fps                | 23             |
| n_updates          | 2192           |
| policy_entropy     |

Round done
--------------------------------------
| approxkl           | 2.0040114e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.06e+03      |
| ep_reward_mean     | 4.67          |
| explained_variance | -0.0158       |
| fps                | 11            |
| n_updates          | 2205          |
| policy_entropy     | 2.1811461     |
| policy_loss        | 2.8155686e-05 |
| serial_timesteps   | 282240        |
| time_elapsed       | 1.75e+04      |
| total_timesteps    | 282240        |
| value_loss         | 0.006707763   |
--------------------------------------
--------------------------------------
| approxkl           | 2.395447e-05  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.06e+03      |
| ep_reward_mean     | 4.67          |
| explained_variance | -0.00236      |
| fps                | 24            |
| n_updates          | 2206          |
| policy_entropy     | 2.1847925     |
| policy_loss        | -0.0013535526 |
| serial_times

--------------------------------------
| approxkl           | 1.1667427e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.05e+03      |
| ep_reward_mean     | 4.62          |
| explained_variance | -0.0154       |
| fps                | 23            |
| n_updates          | 2219          |
| policy_entropy     | 2.167286      |
| policy_loss        | 0.00010875985 |
| serial_timesteps   | 284032        |
| time_elapsed       | 1.76e+04      |
| total_timesteps    | 284032        |
| value_loss         | 0.004276275   |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 5.2405453e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.05e+03       |
| ep_reward_mean     | 4.62           |
| explained_variance | 0.00489        |
| fps                | 11             |
| n_updates          | 2220           |
| policy_entropy     | 2.172698       |
| policy_loss        | -0.00039752573 |
| se

---------------------------------------
| approxkl           | 2.8988436e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.05e+03       |
| ep_reward_mean     | 4.62           |
| explained_variance | -0.00576       |
| fps                | 24             |
| n_updates          | 2233           |
| policy_entropy     | 2.189405       |
| policy_loss        | -0.00023354776 |
| serial_timesteps   | 285824         |
| time_elapsed       | 1.77e+04       |
| total_timesteps    | 285824         |
| value_loss         | 0.0038187501   |
---------------------------------------
--------------------------------------
| approxkl           | 1.7794271e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.05e+03      |
| ep_reward_mean     | 4.62          |
| explained_variance | -0.0011       |
| fps                | 24            |
| n_updates          | 2234          |
| policy_entropy     | 2.1889608     |
| policy_loss        | 5.880464e-05  |
| serial_t

Round done
--------------------------------------
| approxkl           | 5.5789624e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.05e+03      |
| ep_reward_mean     | 4.62          |
| explained_variance | -0.00153      |
| fps                | 10            |
| n_updates          | 2247          |
| policy_entropy     | 2.1921818     |
| policy_loss        | 0.00016401522 |
| serial_timesteps   | 287616        |
| time_elapsed       | 1.78e+04      |
| total_timesteps    | 287616        |
| value_loss         | 0.0013093908  |
--------------------------------------
--------------------------------------
| approxkl           | 3.822746e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.05e+03      |
| ep_reward_mean     | 4.62          |
| explained_variance | 0.000517      |
| fps                | 23            |
| n_updates          | 2248          |
| policy_entropy     | 2.1913495     |
| policy_loss        | 6.3441694e-06 |
| serial_times

--------------------------------------
| approxkl           | 7.440134e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.07e+03      |
| ep_reward_mean     | 4.63          |
| explained_variance | 0.00362       |
| fps                | 24            |
| n_updates          | 2261          |
| policy_entropy     | 2.183556      |
| policy_loss        | -0.0006667003 |
| serial_timesteps   | 289408        |
| time_elapsed       | 1.79e+04      |
| total_timesteps    | 289408        |
| value_loss         | 0.00227932    |
--------------------------------------
Stage done
--------------------------------------
| approxkl           | 1.3881691e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.07e+03      |
| ep_reward_mean     | 4.63          |
| explained_variance | -0.00422      |
| fps                | 9             |
| n_updates          | 2262          |
| policy_entropy     | 2.1931996     |
| policy_loss        | -0.0004161708 |
| serial_times

---------------------------------------
| approxkl           | 4.6802797e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.07e+03       |
| ep_reward_mean     | 4.63           |
| explained_variance | 0.000425       |
| fps                | 23             |
| n_updates          | 2275           |
| policy_entropy     | 2.206179       |
| policy_loss        | -0.00034595001 |
| serial_timesteps   | 291200         |
| time_elapsed       | 1.8e+04        |
| total_timesteps    | 291200         |
| value_loss         | 0.009555545    |
---------------------------------------
Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 3.010673e-05  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.07e+03      |
| ep_reward_mean     | 4.63          |
| explained_variance | -0.00338      |
| fps                | 8             |
| n_updates          | 2276          |
| policy_entrop

---------------------------------------
| approxkl           | 2.029813e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.07e+03       |
| ep_reward_mean     | 4.63           |
| explained_variance | -0.000475      |
| fps                | 23             |
| n_updates          | 2289           |
| policy_entropy     | 2.172399       |
| policy_loss        | -0.00031355023 |
| serial_timesteps   | 292992         |
| time_elapsed       | 1.81e+04       |
| total_timesteps    | 292992         |
| value_loss         | 0.0031565113   |
---------------------------------------
---------------------------------------
| approxkl           | 4.204917e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.07e+03       |
| ep_reward_mean     | 4.63           |
| explained_variance | 0.000588       |
| fps                | 23             |
| n_updates          | 2290           |
| policy_entropy     | 2.171619       |
| policy_loss        | -0.00012575649 |


Round done
--------------------------------------
| approxkl           | 2.2830757e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.07e+03      |
| ep_reward_mean     | 4.63          |
| explained_variance | -0.0016       |
| fps                | 10            |
| n_updates          | 2303          |
| policy_entropy     | 2.2102056     |
| policy_loss        | 3.901217e-05  |
| serial_timesteps   | 294784        |
| time_elapsed       | 1.82e+04      |
| total_timesteps    | 294784        |
| value_loss         | 0.0031724458  |
--------------------------------------
---------------------------------------
| approxkl           | 2.7641013e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.07e+03       |
| ep_reward_mean     | 4.63           |
| explained_variance | 0.0012         |
| fps                | 24             |
| n_updates          | 2304           |
| policy_entropy     | 2.2086055      |
| policy_loss        | -0.00020185113 |
| se

Stage done
----------------------------------------
| approxkl           | 1.9927684e-06   |
| clipfrac           | 0.0             |
| ep_len_mean        | 3.07e+03        |
| ep_reward_mean     | 4.62            |
| explained_variance | 0.0005          |
| fps                | 9               |
| n_updates          | 2317            |
| policy_entropy     | 2.128878        |
| policy_loss        | -0.000105109066 |
| serial_timesteps   | 296576          |
| time_elapsed       | 1.83e+04        |
| total_timesteps    | 296576          |
| value_loss         | 0.004066658     |
----------------------------------------
---------------------------------------
| approxkl           | 1.0117494e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.07e+03       |
| ep_reward_mean     | 4.62           |
| explained_variance | 0.0013         |
| fps                | 24             |
| n_updates          | 2318           |
| policy_entropy     | 2.1297007      |
| policy_loss 

--------------------------------------
| approxkl           | 0.00014386042 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.07e+03      |
| ep_reward_mean     | 4.63          |
| explained_variance | 0.0188        |
| fps                | 24            |
| n_updates          | 2331          |
| policy_entropy     | 1.8727186     |
| policy_loss        | 4.369393e-05  |
| serial_timesteps   | 298368        |
| time_elapsed       | 1.84e+04      |
| total_timesteps    | 298368        |
| value_loss         | 0.0041548805  |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 7.368058e-05  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.07e+03      |
| ep_reward_mean     | 4.63          |
| explained_variance | -0.000118     |
| fps                | 11            |
| n_updates          | 2332          |
| policy_entropy     | 1.8848774     |
| policy_loss        | -0.0009444086 |
| serial_times

Stage done
--------------------------------------
| approxkl           | 1.82477e-05   |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.07e+03      |
| ep_reward_mean     | 4.63          |
| explained_variance | -0.00292      |
| fps                | 9             |
| n_updates          | 2345          |
| policy_entropy     | 1.8494985     |
| policy_loss        | -0.0005722176 |
| serial_timesteps   | 300160        |
| time_elapsed       | 1.85e+04      |
| total_timesteps    | 300160        |
| value_loss         | 0.0052637234  |
--------------------------------------
--------------------------------------
| approxkl           | 2.2148126e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.07e+03      |
| ep_reward_mean     | 4.63          |
| explained_variance | 0.003         |
| fps                | 24            |
| n_updates          | 2346          |
| policy_entropy     | 1.8519969     |
| policy_loss        | -0.0005932981 |
| serial_times

--------------------------------------
| approxkl           | 0.00020843676 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.08e+03      |
| ep_reward_mean     | 4.64          |
| explained_variance | -0.00347      |
| fps                | 23            |
| n_updates          | 2359          |
| policy_entropy     | 1.4689598     |
| policy_loss        | -0.0011272482 |
| serial_timesteps   | 301952        |
| time_elapsed       | 1.87e+04      |
| total_timesteps    | 301952        |
| value_loss         | 0.0034618615  |
--------------------------------------
--------------------------------------
| approxkl           | 2.5948066e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.08e+03      |
| ep_reward_mean     | 4.64          |
| explained_variance | -0.00426      |
| fps                | 24            |
| n_updates          | 2360          |
| policy_entropy     | 1.5463161     |
| policy_loss        | 0.00073776394 |
| serial_timesteps   | 30

--------------------------------------
| approxkl           | 2.6331221e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.08e+03      |
| ep_reward_mean     | 4.64          |
| explained_variance | 0.00537       |
| fps                | 24            |
| n_updates          | 2373          |
| policy_entropy     | 1.6260463     |
| policy_loss        | 0.00024077669 |
| serial_timesteps   | 303744        |
| time_elapsed       | 1.88e+04      |
| total_timesteps    | 303744        |
| value_loss         | 0.002337404   |
--------------------------------------
--------------------------------------
| approxkl           | 5.2940268e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.08e+03      |
| ep_reward_mean     | 4.64          |
| explained_variance | 0.00461       |
| fps                | 23            |
| n_updates          | 2374          |
| policy_entropy     | 1.6182269     |
| policy_loss        | -0.0018389216 |
| serial_timesteps   | 30

--------------------------------------
| approxkl           | 0.00021239498 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.08e+03      |
| ep_reward_mean     | 4.63          |
| explained_variance | -0.00212      |
| fps                | 24            |
| n_updates          | 2387          |
| policy_entropy     | 1.4085        |
| policy_loss        | 7.057376e-05  |
| serial_timesteps   | 305536        |
| time_elapsed       | 1.89e+04      |
| total_timesteps    | 305536        |
| value_loss         | 0.008903769   |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 0.00018574414 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.08e+03      |
| ep_reward_mean     | 4.63          |
| explained_variance | -0.00324      |
| fps                | 11            |
| n_updates          | 2388          |
| policy_entropy     | 1.4853704     |
| policy_loss        | -0.0018172264 |
| serial_times

Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 2.490411e-05  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.07e+03      |
| ep_reward_mean     | 4.61          |
| explained_variance | 0.195         |
| fps                | 8             |
| n_updates          | 2401          |
| policy_entropy     | 1.5182216     |
| policy_loss        | -0.0010592407 |
| serial_timesteps   | 307328        |
| time_elapsed       | 1.9e+04       |
| total_timesteps    | 307328        |
| value_loss         | 0.0048109987  |
--------------------------------------
--------------------------------------
| approxkl           | 8.809034e-05  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.07e+03      |
| ep_reward_mean     | 4.61          |
| explained_variance | 0.0174        |
| fps                | 24            |
| n_updates          | 2402          |
| policy_entropy     | 1.57200

Stage done
--------------------------------------
| approxkl           | 1.4121292e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.07e+03      |
| ep_reward_mean     | 4.61          |
| explained_variance | -0.000512     |
| fps                | 10            |
| n_updates          | 2415          |
| policy_entropy     | 1.6219605     |
| policy_loss        | 7.441919e-05  |
| serial_timesteps   | 309120        |
| time_elapsed       | 1.91e+04      |
| total_timesteps    | 309120        |
| value_loss         | 0.0026981577  |
--------------------------------------
--------------------------------------
| approxkl           | 7.1845425e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.07e+03      |
| ep_reward_mean     | 4.61          |
| explained_variance | -0.000484     |
| fps                | 23            |
| n_updates          | 2416          |
| policy_entropy     | 1.6396334     |
| policy_loss        | -0.0005705571 |
| serial_times

Stage done
---------------------------------------
| approxkl           | 1.6502958e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.06e+03       |
| ep_reward_mean     | 4.57           |
| explained_variance | -0.00772       |
| fps                | 9              |
| n_updates          | 2429           |
| policy_entropy     | 1.7399411      |
| policy_loss        | -0.00037874095 |
| serial_timesteps   | 310912         |
| time_elapsed       | 1.92e+04       |
| total_timesteps    | 310912         |
| value_loss         | 0.01037702     |
---------------------------------------
--------------------------------------
| approxkl           | 1.2858644e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.06e+03      |
| ep_reward_mean     | 4.57          |
| explained_variance | 0.0106        |
| fps                | 23            |
| n_updates          | 2430          |
| policy_entropy     | 1.757404      |
| policy_loss        | -0.0003315967 |

---------------------------------------
| approxkl           | 2.5134766e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.06e+03       |
| ep_reward_mean     | 4.56           |
| explained_variance | 0.0217         |
| fps                | 24             |
| n_updates          | 2443           |
| policy_entropy     | 1.946557       |
| policy_loss        | -0.00059414655 |
| serial_timesteps   | 312704         |
| time_elapsed       | 1.93e+04       |
| total_timesteps    | 312704         |
| value_loss         | 0.0048378874   |
---------------------------------------
---------------------------------------
| approxkl           | 2.3196988e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.06e+03       |
| ep_reward_mean     | 4.56           |
| explained_variance | -0.0198        |
| fps                | 24             |
| n_updates          | 2444           |
| policy_entropy     | 1.9682822      |
| policy_loss        | -0.00045369565 |


---------------------------------------
| approxkl           | 6.822009e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.04e+03       |
| ep_reward_mean     | 4.52           |
| explained_variance | -0.00579       |
| fps                | 24             |
| n_updates          | 2457           |
| policy_entropy     | 1.9738957      |
| policy_loss        | -0.00023601158 |
| serial_timesteps   | 314496         |
| time_elapsed       | 1.94e+04       |
| total_timesteps    | 314496         |
| value_loss         | 0.0074136555   |
---------------------------------------
Stage done
--------------------------------------
| approxkl           | 1.2609058e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.04e+03      |
| ep_reward_mean     | 4.52          |
| explained_variance | 0.0048        |
| fps                | 9             |
| n_updates          | 2458          |
| policy_entropy     | 1.972193      |
| policy_loss        | -0.0012317225 |

--------------------------------------
| approxkl           | 2.941316e-05  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.04e+03      |
| ep_reward_mean     | 4.52          |
| explained_variance | -0.000697     |
| fps                | 24            |
| n_updates          | 2471          |
| policy_entropy     | 1.9699234     |
| policy_loss        | -0.0005902862 |
| serial_timesteps   | 316288        |
| time_elapsed       | 1.95e+04      |
| total_timesteps    | 316288        |
| value_loss         | 0.002489213   |
--------------------------------------
--------------------------------------
| approxkl           | 2.3281971e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.04e+03      |
| ep_reward_mean     | 4.52          |
| explained_variance | -0.00204      |
| fps                | 24            |
| n_updates          | 2472          |
| policy_entropy     | 1.984587      |
| policy_loss        | -0.0005518012 |
| serial_timesteps   | 31

---------------------------------------
| approxkl           | 1.12662565e-05 |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.05e+03       |
| ep_reward_mean     | 4.51           |
| explained_variance | -0.000884      |
| fps                | 24             |
| n_updates          | 2485           |
| policy_entropy     | 2.029148       |
| policy_loss        | -0.0003338959  |
| serial_timesteps   | 318080         |
| time_elapsed       | 1.97e+04       |
| total_timesteps    | 318080         |
| value_loss         | 0.005478757    |
---------------------------------------
---------------------------------------
| approxkl           | 8.722568e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.05e+03       |
| ep_reward_mean     | 4.51           |
| explained_variance | 0.00449        |
| fps                | 24             |
| n_updates          | 2486           |
| policy_entropy     | 2.0408268      |
| policy_loss        | -0.00026870426 |


Stage done
--------------------------------------
| approxkl           | 4.922381e-05  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.49          |
| explained_variance | -0.00163      |
| fps                | 9             |
| n_updates          | 2499          |
| policy_entropy     | 2.0190432     |
| policy_loss        | -0.0010219552 |
| serial_timesteps   | 319872        |
| time_elapsed       | 1.98e+04      |
| total_timesteps    | 319872        |
| value_loss         | 0.0053932946  |
--------------------------------------
--------------------------------------
| approxkl           | 2.7936847e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.49          |
| explained_variance | 0.000602      |
| fps                | 24            |
| n_updates          | 2500          |
| policy_entropy     | 2.010057      |
| policy_loss        | 0.00028769672 |
| serial_times

--------------------------------------
| approxkl           | 5.560494e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.49          |
| explained_variance | -0.00137      |
| fps                | 24            |
| n_updates          | 2513          |
| policy_entropy     | 1.9894128     |
| policy_loss        | -2.390705e-05 |
| serial_timesteps   | 321664        |
| time_elapsed       | 1.99e+04      |
| total_timesteps    | 321664        |
| value_loss         | 0.011332581   |
--------------------------------------
--------------------------------------
| approxkl           | 5.336164e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.49          |
| explained_variance | 0.000564      |
| fps                | 24            |
| n_updates          | 2514          |
| policy_entropy     | 1.9982942     |
| policy_loss        | -0.0001583118 |
| serial_timesteps   | 32

---------------------------------------
| approxkl           | 1.6039685e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.03e+03       |
| ep_reward_mean     | 4.46           |
| explained_variance | 0.0107         |
| fps                | 23             |
| n_updates          | 2527           |
| policy_entropy     | 1.9295077      |
| policy_loss        | -0.00040499493 |
| serial_timesteps   | 323456         |
| time_elapsed       | 2e+04          |
| total_timesteps    | 323456         |
| value_loss         | 0.0076624155   |
---------------------------------------
---------------------------------------
| approxkl           | 1.3174691e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.03e+03       |
| ep_reward_mean     | 4.46           |
| explained_variance | -0.00928       |
| fps                | 24             |
| n_updates          | 2528           |
| policy_entropy     | 1.9587822      |
| policy_loss        | -2.0932406e-05 |


Round done
---------------------------------------
| approxkl           | 4.3011096e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.03e+03       |
| ep_reward_mean     | 4.46           |
| explained_variance | 0.006          |
| fps                | 11             |
| n_updates          | 2541           |
| policy_entropy     | 2.0525098      |
| policy_loss        | -0.00022160634 |
| serial_timesteps   | 325248         |
| time_elapsed       | 2.01e+04       |
| total_timesteps    | 325248         |
| value_loss         | 0.0036642125   |
---------------------------------------
---------------------------------------
| approxkl           | 6.5661407e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.03e+03       |
| ep_reward_mean     | 4.46           |
| explained_variance | -0.00241       |
| fps                | 23             |
| n_updates          | 2542           |
| policy_entropy     | 2.0629888      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 5.777755e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.05e+03      |
| ep_reward_mean     | 4.5           |
| explained_variance | 0.00863       |
| fps                | 24            |
| n_updates          | 2555          |
| policy_entropy     | 2.091773      |
| policy_loss        | -8.300692e-05 |
| serial_timesteps   | 327040        |
| time_elapsed       | 2.02e+04      |
| total_timesteps    | 327040        |
| value_loss         | 0.0015401734  |
--------------------------------------
--------------------------------------
| approxkl           | 1.3781179e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.05e+03      |
| ep_reward_mean     | 4.5           |
| explained_variance | -0.00959      |
| fps                | 24            |
| n_updates          | 2556          |
| policy_entropy     | 2.0926704     |
| policy_loss        | -0.0005703697 |
| serial_timesteps   | 32

Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 5.3590367e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.04e+03      |
| ep_reward_mean     | 4.49          |
| explained_variance | 0.463         |
| fps                | 8             |
| n_updates          | 2569          |
| policy_entropy     | 1.8447635     |
| policy_loss        | -0.0020167148 |
| serial_timesteps   | 328832        |
| time_elapsed       | 2.03e+04      |
| total_timesteps    | 328832        |
| value_loss         | 0.0021808269  |
--------------------------------------
---------------------------------------
| approxkl           | 4.771819e-05   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.04e+03       |
| ep_reward_mean     | 4.49           |
| explained_variance | -0.0261        |
| fps                | 24             |
| n_updates          | 2570           |
| policy_entropy     |

---------------------------------------
| approxkl           | 9.967595e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.04e+03       |
| ep_reward_mean     | 4.49           |
| explained_variance | -0.00889       |
| fps                | 24             |
| n_updates          | 2583           |
| policy_entropy     | 2.065367       |
| policy_loss        | -0.00044501945 |
| serial_timesteps   | 330624         |
| time_elapsed       | 2.04e+04       |
| total_timesteps    | 330624         |
| value_loss         | 0.0021064824   |
---------------------------------------
---------------------------------------
| approxkl           | 1.36249455e-05 |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.04e+03       |
| ep_reward_mean     | 4.49           |
| explained_variance | -0.000252      |
| fps                | 24             |
| n_updates          | 2584           |
| policy_entropy     | 2.0631342      |
| policy_loss        | -0.00010061718 |


---------------------------------------
| approxkl           | 1.85871e-05    |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.04e+03       |
| ep_reward_mean     | 4.46           |
| explained_variance | -8.69e-05      |
| fps                | 24             |
| n_updates          | 2597           |
| policy_entropy     | 1.8837073      |
| policy_loss        | -0.00052836165 |
| serial_timesteps   | 332416         |
| time_elapsed       | 2.05e+04       |
| total_timesteps    | 332416         |
| value_loss         | 0.0073281964   |
---------------------------------------
---------------------------------------
| approxkl           | 1.2692226e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.04e+03       |
| ep_reward_mean     | 4.46           |
| explained_variance | -0.00406       |
| fps                | 23             |
| n_updates          | 2598           |
| policy_entropy     | 1.9110042      |
| policy_loss        | -0.00018278696 |


--------------------------------------
| approxkl           | 8.588736e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.04e+03      |
| ep_reward_mean     | 4.46          |
| explained_variance | -0.000129     |
| fps                | 23            |
| n_updates          | 2611          |
| policy_entropy     | 1.9692302     |
| policy_loss        | 3.9432198e-05 |
| serial_timesteps   | 334208        |
| time_elapsed       | 2.06e+04      |
| total_timesteps    | 334208        |
| value_loss         | 0.0031094793  |
--------------------------------------
---------------------------------------
| approxkl           | 4.765149e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.04e+03       |
| ep_reward_mean     | 4.46           |
| explained_variance | -0.000903      |
| fps                | 23             |
| n_updates          | 2612           |
| policy_entropy     | 1.9699082      |
| policy_loss        | -0.00047101825 |
| serial_timest

--------------------------------------
| approxkl           | 4.3302307e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.03e+03      |
| ep_reward_mean     | 4.35          |
| explained_variance | -0.00352      |
| fps                | 24            |
| n_updates          | 2625          |
| policy_entropy     | 1.9440943     |
| policy_loss        | -0.0003414424 |
| serial_timesteps   | 336000        |
| time_elapsed       | 2.07e+04      |
| total_timesteps    | 336000        |
| value_loss         | 0.0054919617  |
--------------------------------------
--------------------------------------
| approxkl           | 1.0142956e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.03e+03      |
| ep_reward_mean     | 4.35          |
| explained_variance | -0.00491      |
| fps                | 24            |
| n_updates          | 2626          |
| policy_entropy     | 1.951292      |
| policy_loss        | -0.0004812088 |
| serial_timesteps   | 33

---------------------------------------
| approxkl           | 7.8514995e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.03e+03       |
| ep_reward_mean     | 4.35           |
| explained_variance | -0.00399       |
| fps                | 23             |
| n_updates          | 2639           |
| policy_entropy     | 1.9761659      |
| policy_loss        | -0.00064893067 |
| serial_timesteps   | 337792         |
| time_elapsed       | 2.08e+04       |
| total_timesteps    | 337792         |
| value_loss         | 0.0036398007   |
---------------------------------------
--------------------------------------
| approxkl           | 4.476996e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.03e+03      |
| ep_reward_mean     | 4.35          |
| explained_variance | 0.000823      |
| fps                | 24            |
| n_updates          | 2640          |
| policy_entropy     | 1.9825451     |
| policy_loss        | 0.00015660748 |
| serial_t

Round done
---------------------------------------
| approxkl           | 5.224918e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.03e+03       |
| ep_reward_mean     | 4.31           |
| explained_variance | -0.00258       |
| fps                | 11             |
| n_updates          | 2653           |
| policy_entropy     | 1.832974       |
| policy_loss        | -0.00017249212 |
| serial_timesteps   | 339584         |
| time_elapsed       | 2.09e+04       |
| total_timesteps    | 339584         |
| value_loss         | 0.00357329     |
---------------------------------------
---------------------------------------
| approxkl           | 4.6986715e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.03e+03       |
| ep_reward_mean     | 4.31           |
| explained_variance | -0.000451      |
| fps                | 23             |
| n_updates          | 2654           |
| policy_entropy     | 1.8394567      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 7.3210786e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.03e+03      |
| ep_reward_mean     | 4.31          |
| explained_variance | 0.00231       |
| fps                | 24            |
| n_updates          | 2667          |
| policy_entropy     | 1.94683       |
| policy_loss        | -8.118898e-05 |
| serial_timesteps   | 341376        |
| time_elapsed       | 2.1e+04       |
| total_timesteps    | 341376        |
| value_loss         | 0.0031541118  |
--------------------------------------
Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 2.5721649e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.01e+03      |
| ep_reward_mean     | 4.34          |
| explained_variance | 0.353         |
| fps                | 8             |
| n_updates          | 2668          |
| policy_entropy     | 1.80678

-------------------------------------
| approxkl           | 5.452923e-06 |
| clipfrac           | 0.0          |
| ep_len_mean        | 3.01e+03     |
| ep_reward_mean     | 4.34         |
| explained_variance | -0.00138     |
| fps                | 23           |
| n_updates          | 2681         |
| policy_entropy     | 1.992219     |
| policy_loss        | -3.57572e-05 |
| serial_timesteps   | 343168       |
| time_elapsed       | 2.11e+04     |
| total_timesteps    | 343168       |
| value_loss         | 0.0036181686 |
-------------------------------------
--------------------------------------
| approxkl           | 6.5033255e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.01e+03      |
| ep_reward_mean     | 4.34          |
| explained_variance | 0.00363       |
| fps                | 23            |
| n_updates          | 2682          |
| policy_entropy     | 1.9933474     |
| policy_loss        | -0.000174582  |
| serial_timesteps   | 343296        |
|

---------------------------------------
| approxkl           | 5.708225e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.01e+03       |
| ep_reward_mean     | 4.34           |
| explained_variance | 0.000743       |
| fps                | 23             |
| n_updates          | 2695           |
| policy_entropy     | 1.9824787      |
| policy_loss        | -0.00011338666 |
| serial_timesteps   | 344960         |
| time_elapsed       | 2.13e+04       |
| total_timesteps    | 344960         |
| value_loss         | 0.0044798595   |
---------------------------------------
--------------------------------------
| approxkl           | 1.4707127e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.01e+03      |
| ep_reward_mean     | 4.34          |
| explained_variance | 0.00227       |
| fps                | 24            |
| n_updates          | 2696          |
| policy_entropy     | 1.9849876     |
| policy_loss        | -0.0008013919 |
| serial_t

---------------------------------------
| approxkl           | 5.002451e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.03e+03       |
| ep_reward_mean     | 4.3            |
| explained_variance | -0.00288       |
| fps                | 24             |
| n_updates          | 2709           |
| policy_entropy     | 1.8590691      |
| policy_loss        | -1.6327947e-05 |
| serial_timesteps   | 346752         |
| time_elapsed       | 2.14e+04       |
| total_timesteps    | 346752         |
| value_loss         | 0.009904291    |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 1.7285736e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.03e+03       |
| ep_reward_mean     | 4.3            |
| explained_variance | 0.00174        |
| fps                | 11             |
| n_updates          | 2710           |
| policy_entropy     | 1.8607688      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 2.1695067e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.26          |
| explained_variance | -0.00203      |
| fps                | 23            |
| n_updates          | 2723          |
| policy_entropy     | 1.8055764     |
| policy_loss        | -0.0008122958 |
| serial_timesteps   | 348544        |
| time_elapsed       | 2.15e+04      |
| total_timesteps    | 348544        |
| value_loss         | 0.0048084604  |
--------------------------------------
---------------------------------------
| approxkl           | 3.5518984e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.02e+03       |
| ep_reward_mean     | 4.26           |
| explained_variance | -0.00409       |
| fps                | 23             |
| n_updates          | 2724           |
| policy_entropy     | 1.8195314      |
| policy_loss        | -0.00069879554 |
| serial_timest

--------------------------------------
| approxkl           | 5.049588e-05  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.23          |
| explained_variance | -0.00197      |
| fps                | 24            |
| n_updates          | 2737          |
| policy_entropy     | 1.8599627     |
| policy_loss        | -0.0002464361 |
| serial_timesteps   | 350336        |
| time_elapsed       | 2.16e+04      |
| total_timesteps    | 350336        |
| value_loss         | 0.0012190958  |
--------------------------------------
Stage done
--------------------------------------
| approxkl           | 1.9960084e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.23          |
| explained_variance | -0.00158      |
| fps                | 9             |
| n_updates          | 2738          |
| policy_entropy     | 1.8686295     |
| policy_loss        | 0.00024647638 |
| serial_times

Round done
--------------------------------------
| approxkl           | 1.5529242e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.23          |
| explained_variance | 0.00353       |
| fps                | 11            |
| n_updates          | 2751          |
| policy_entropy     | 1.9608977     |
| policy_loss        | -0.0003136536 |
| serial_timesteps   | 352128        |
| time_elapsed       | 2.17e+04      |
| total_timesteps    | 352128        |
| value_loss         | 0.012380183   |
--------------------------------------
--------------------------------------
| approxkl           | 2.4305244e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.23          |
| explained_variance | 0.000294      |
| fps                | 24            |
| n_updates          | 2752          |
| policy_entropy     | 1.9547267     |
| policy_loss        | -0.0012709443 |
| serial_times

--------------------------------------
| approxkl           | 0.00011268508 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.01e+03      |
| ep_reward_mean     | 4.23          |
| explained_variance | -0.000533     |
| fps                | 24            |
| n_updates          | 2765          |
| policy_entropy     | 1.8507966     |
| policy_loss        | -0.0029939376 |
| serial_timesteps   | 353920        |
| time_elapsed       | 2.18e+04      |
| total_timesteps    | 353920        |
| value_loss         | 0.0034381556  |
--------------------------------------
Stage done
---------------------------------------
| approxkl           | 0.00012085212  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.01e+03       |
| ep_reward_mean     | 4.23           |
| explained_variance | -0.000812      |
| fps                | 9              |
| n_updates          | 2766           |
| policy_entropy     | 1.849893       |
| policy_loss        | -0.00031381473 |
| se

Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 0.00010901454 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3e+03         |
| ep_reward_mean     | 4.23          |
| explained_variance | 0.272         |
| fps                | 8             |
| n_updates          | 2779          |
| policy_entropy     | 1.790716      |
| policy_loss        | -0.003492985  |
| serial_timesteps   | 355712        |
| time_elapsed       | 2.19e+04      |
| total_timesteps    | 355712        |
| value_loss         | 0.003497772   |
--------------------------------------
---------------------------------------
| approxkl           | 0.00017373935  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3e+03          |
| ep_reward_mean     | 4.23           |
| explained_variance | -0.0657        |
| fps                | 24             |
| n_updates          | 2780           |
| policy_entropy     |

--------------------------------------
| approxkl           | 2.1440925e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3e+03         |
| ep_reward_mean     | 4.23          |
| explained_variance | 0.000788      |
| fps                | 24            |
| n_updates          | 2793          |
| policy_entropy     | 1.9734204     |
| policy_loss        | -0.0008091042 |
| serial_timesteps   | 357504        |
| time_elapsed       | 2.2e+04       |
| total_timesteps    | 357504        |
| value_loss         | 0.0036915415  |
--------------------------------------
--------------------------------------
| approxkl           | 1.8230347e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3e+03         |
| ep_reward_mean     | 4.23          |
| explained_variance | 0.00259       |
| fps                | 23            |
| n_updates          | 2794          |
| policy_entropy     | 1.9736135     |
| policy_loss        | -0.0003514886 |
| serial_timesteps   | 35

Round done
---------------------------------------
| approxkl           | 1.877955e-05   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3e+03          |
| ep_reward_mean     | 4.23           |
| explained_variance | 0.000564       |
| fps                | 11             |
| n_updates          | 2807           |
| policy_entropy     | 1.9796146      |
| policy_loss        | -0.00048630685 |
| serial_timesteps   | 359296         |
| time_elapsed       | 2.21e+04       |
| total_timesteps    | 359296         |
| value_loss         | 0.007598263    |
---------------------------------------
---------------------------------------
| approxkl           | 7.938661e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3e+03          |
| ep_reward_mean     | 4.23           |
| explained_variance | 5.75e-05       |
| fps                | 24             |
| n_updates          | 2808           |
| policy_entropy     | 1.9706521      |
| policy_loss        | -1.138

--------------------------------------
| approxkl           | 4.353794e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.28          |
| explained_variance | 0.00226       |
| fps                | 24            |
| n_updates          | 2821          |
| policy_entropy     | 1.9318535     |
| policy_loss        | -8.766912e-05 |
| serial_timesteps   | 361088        |
| time_elapsed       | 2.22e+04      |
| total_timesteps    | 361088        |
| value_loss         | 0.0023784977  |
--------------------------------------
Round done
-------------------------------------
| approxkl           | 4.212088e-06 |
| clipfrac           | 0.0          |
| ep_len_mean        | 3.02e+03     |
| ep_reward_mean     | 4.28         |
| explained_variance | 0.00172      |
| fps                | 11           |
| n_updates          | 2822         |
| policy_entropy     | 1.9369642    |
| policy_loss        | -0.000176806 |
| serial_timesteps   | 3

--------------------------------------
| approxkl           | 3.931451e-05  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3e+03         |
| ep_reward_mean     | 4.21          |
| explained_variance | -0.00232      |
| fps                | 24            |
| n_updates          | 2835          |
| policy_entropy     | 1.91433       |
| policy_loss        | -0.0009145364 |
| serial_timesteps   | 362880        |
| time_elapsed       | 2.23e+04      |
| total_timesteps    | 362880        |
| value_loss         | 0.002062338   |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 2.1564974e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3e+03          |
| ep_reward_mean     | 4.21           |
| explained_variance | -0.00157       |
| fps                | 11             |
| n_updates          | 2836           |
| policy_entropy     | 1.9099065      |
| policy_loss        | -0.00025629625 |
| se

Round done
---------------------------------------
| approxkl           | 6.8929885e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3e+03          |
| ep_reward_mean     | 4.21           |
| explained_variance | 0.000896       |
| fps                | 10             |
| n_updates          | 2849           |
| policy_entropy     | 1.8587798      |
| policy_loss        | -0.00026010722 |
| serial_timesteps   | 364672         |
| time_elapsed       | 2.24e+04       |
| total_timesteps    | 364672         |
| value_loss         | 0.0042907773   |
---------------------------------------
---------------------------------------
| approxkl           | 2.6295274e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3e+03          |
| ep_reward_mean     | 4.21           |
| explained_variance | -0.000105      |
| fps                | 23             |
| n_updates          | 2850           |
| policy_entropy     | 1.8571932      |
| policy_loss        | -0.000

---------------------------------------
| approxkl           | 1.2748642e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3e+03          |
| ep_reward_mean     | 4.21           |
| explained_variance | -0.000229      |
| fps                | 23             |
| n_updates          | 2863           |
| policy_entropy     | 1.932873       |
| policy_loss        | -0.00019534305 |
| serial_timesteps   | 366464         |
| time_elapsed       | 2.26e+04       |
| total_timesteps    | 366464         |
| value_loss         | 0.005067621    |
---------------------------------------
--------------------------------------
| approxkl           | 1.5564561e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3e+03         |
| ep_reward_mean     | 4.21          |
| explained_variance | 4.58e-05      |
| fps                | 24            |
| n_updates          | 2864          |
| policy_entropy     | 1.9388773     |
| policy_loss        | -0.001342833  |
| serial_t

Stage done
---------------------------------------
| approxkl           | 9.690755e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3e+03          |
| ep_reward_mean     | 4.21           |
| explained_variance | 0.000742       |
| fps                | 9              |
| n_updates          | 2877           |
| policy_entropy     | 2.030474       |
| policy_loss        | -0.00031853467 |
| serial_timesteps   | 368256         |
| time_elapsed       | 2.27e+04       |
| total_timesteps    | 368256         |
| value_loss         | 0.010433032    |
---------------------------------------
--------------------------------------
| approxkl           | 4.7509284e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3e+03         |
| ep_reward_mean     | 4.21          |
| explained_variance | 0.00236       |
| fps                | 24            |
| n_updates          | 2878          |
| policy_entropy     | 2.030714      |
| policy_loss        | 4.723668e-05  |

--------------------------------------
| approxkl           | 2.1020998e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.05e+03      |
| ep_reward_mean     | 4.22          |
| explained_variance | -0.00107      |
| fps                | 24            |
| n_updates          | 2891          |
| policy_entropy     | 1.9702437     |
| policy_loss        | -0.0010340003 |
| serial_timesteps   | 370048        |
| time_elapsed       | 2.28e+04      |
| total_timesteps    | 370048        |
| value_loss         | 0.004592837   |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 3.5064513e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.05e+03      |
| ep_reward_mean     | 4.22          |
| explained_variance | -0.000793     |
| fps                | 11            |
| n_updates          | 2892          |
| policy_entropy     | 1.9644735     |
| policy_loss        | -0.0003634384 |
| serial_times

Round done
---------------------------------------
| approxkl           | 2.8905856e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.05e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | -0.00115       |
| fps                | 12             |
| n_updates          | 2905           |
| policy_entropy     | 2.0019271      |
| policy_loss        | -0.00020671077 |
| serial_timesteps   | 371840         |
| time_elapsed       | 2.29e+04       |
| total_timesteps    | 371840         |
| value_loss         | 0.0037408038   |
---------------------------------------
---------------------------------------
| approxkl           | 2.8935608e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.05e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | -0.00466       |
| fps                | 24             |
| n_updates          | 2906           |
| policy_entropy     | 2.0026047      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 3.3918084e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.06e+03      |
| ep_reward_mean     | 4.17          |
| explained_variance | 0.0285        |
| fps                | 24            |
| n_updates          | 2919          |
| policy_entropy     | 1.9065778     |
| policy_loss        | -0.001457423  |
| serial_timesteps   | 373632        |
| time_elapsed       | 2.3e+04       |
| total_timesteps    | 373632        |
| value_loss         | 0.006790894   |
--------------------------------------
--------------------------------------
| approxkl           | 2.8990213e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.06e+03      |
| ep_reward_mean     | 4.17          |
| explained_variance | 0.023         |
| fps                | 24            |
| n_updates          | 2920          |
| policy_entropy     | 1.9282523     |
| policy_loss        | 0.00010774657 |
| serial_timesteps   | 37

---------------------------------------
| approxkl           | 5.5600253e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.06e+03       |
| ep_reward_mean     | 4.17           |
| explained_variance | -0.000594      |
| fps                | 24             |
| n_updates          | 2933           |
| policy_entropy     | 1.9891152      |
| policy_loss        | -1.8071383e-05 |
| serial_timesteps   | 375424         |
| time_elapsed       | 2.31e+04       |
| total_timesteps    | 375424         |
| value_loss         | 0.00485359     |
---------------------------------------
--------------------------------------
| approxkl           | 2.9966875e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.06e+03      |
| ep_reward_mean     | 4.17          |
| explained_variance | 0.000473      |
| fps                | 24            |
| n_updates          | 2934          |
| policy_entropy     | 1.9875406     |
| policy_loss        | -8.354639e-05 |
| serial_t

---------------------------------------
| approxkl           | 1.8875766e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.05e+03       |
| ep_reward_mean     | 4.13           |
| explained_variance | 0.00749        |
| fps                | 24             |
| n_updates          | 2947           |
| policy_entropy     | 1.885519       |
| policy_loss        | -0.00051681604 |
| serial_timesteps   | 377216         |
| time_elapsed       | 2.32e+04       |
| total_timesteps    | 377216         |
| value_loss         | 0.0044118143   |
---------------------------------------
--------------------------------------
| approxkl           | 2.0957783e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.05e+03      |
| ep_reward_mean     | 4.13          |
| explained_variance | 0.0369        |
| fps                | 24            |
| n_updates          | 2948          |
| policy_entropy     | 1.9509335     |
| policy_loss        | -0.0009854324 |
| serial_t

---------------------------------------
| approxkl           | 8.540878e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.05e+03       |
| ep_reward_mean     | 4.13           |
| explained_variance | -0.000552      |
| fps                | 24             |
| n_updates          | 2961           |
| policy_entropy     | 2.041575       |
| policy_loss        | -0.00061612576 |
| serial_timesteps   | 379008         |
| time_elapsed       | 2.33e+04       |
| total_timesteps    | 379008         |
| value_loss         | 0.0035881214   |
---------------------------------------
Round done
--------------------------------------
| approxkl           | 1.2856536e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.05e+03      |
| ep_reward_mean     | 4.13          |
| explained_variance | 0.00155       |
| fps                | 12            |
| n_updates          | 2962          |
| policy_entropy     | 2.0434842     |
| policy_loss        | -0.0007670354 |

---------------------------------------
| approxkl           | 2.5163932e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.05e+03       |
| ep_reward_mean     | 4.13           |
| explained_variance | -0.00168       |
| fps                | 23             |
| n_updates          | 2975           |
| policy_entropy     | 2.101262       |
| policy_loss        | -0.00053776056 |
| serial_timesteps   | 380800         |
| time_elapsed       | 2.34e+04       |
| total_timesteps    | 380800         |
| value_loss         | 0.0034370641   |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 1.8095565e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.05e+03       |
| ep_reward_mean     | 4.13           |
| explained_variance | 0.00115        |
| fps                | 11             |
| n_updates          | 2976           |
| policy_entropy     | 2.1131084      |
| policy_loss        | -0.000

Round done
--------------------------------------
| approxkl           | 1.5384037e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.04e+03      |
| ep_reward_mean     | 4.12          |
| explained_variance | -0.00238      |
| fps                | 11            |
| n_updates          | 2989          |
| policy_entropy     | 2.1054523     |
| policy_loss        | -0.0005059317 |
| serial_timesteps   | 382592        |
| time_elapsed       | 2.36e+04      |
| total_timesteps    | 382592        |
| value_loss         | 0.0050991997  |
--------------------------------------
---------------------------------------
| approxkl           | 2.026448e-05   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.04e+03       |
| ep_reward_mean     | 4.12           |
| explained_variance | -0.000308      |
| fps                | 24             |
| n_updates          | 2990           |
| policy_entropy     | 2.103876       |
| policy_loss        | -0.00036375225 |
| se

---------------------------------------
| approxkl           | 1.6373206e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.02e+03       |
| ep_reward_mean     | 4.05           |
| explained_variance | 0.052          |
| fps                | 24             |
| n_updates          | 3003           |
| policy_entropy     | 1.991159       |
| policy_loss        | -0.00043252483 |
| serial_timesteps   | 384384         |
| time_elapsed       | 2.37e+04       |
| total_timesteps    | 384384         |
| value_loss         | 0.003184115    |
---------------------------------------
Round done
--------------------------------------
| approxkl           | 1.6882674e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.05          |
| explained_variance | 0.00436       |
| fps                | 10            |
| n_updates          | 3004          |
| policy_entropy     | 2.0060859     |
| policy_loss        | -0.0005862005 |

---------------------------------------
| approxkl           | 6.4360365e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.02e+03       |
| ep_reward_mean     | 4.05           |
| explained_variance | -2.28e-05      |
| fps                | 24             |
| n_updates          | 3017           |
| policy_entropy     | 2.0591128      |
| policy_loss        | -0.00018241443 |
| serial_timesteps   | 386176         |
| time_elapsed       | 2.38e+04       |
| total_timesteps    | 386176         |
| value_loss         | 0.00443299     |
---------------------------------------
Round done
--------------------------------------
| approxkl           | 4.485603e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.05          |
| explained_variance | 0.000894      |
| fps                | 11            |
| n_updates          | 3018          |
| policy_entropy     | 2.0639813     |
| policy_loss        | -0.0002589915 |

--------------------------------------
| approxkl           | 1.4003807e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.05          |
| explained_variance | 0.00201       |
| fps                | 24            |
| n_updates          | 3031          |
| policy_entropy     | 1.9980129     |
| policy_loss        | 1.9634143e-05 |
| serial_timesteps   | 387968        |
| time_elapsed       | 2.39e+04      |
| total_timesteps    | 387968        |
| value_loss         | 0.0032300998  |
--------------------------------------
Stage done
--------------------------------------
| approxkl           | 2.5519748e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.05          |
| explained_variance | 0.00439       |
| fps                | 9             |
| n_updates          | 3032          |
| policy_entropy     | 1.9998031     |
| policy_loss        | 0.00013567228 |
| serial_times

Round done
--------------------------------------
| approxkl           | 4.6546224e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 2.99e+03      |
| ep_reward_mean     | 4.01          |
| explained_variance | -0.000369     |
| fps                | 11            |
| n_updates          | 3045          |
| policy_entropy     | 2.00103       |
| policy_loss        | 3.7036836e-05 |
| serial_timesteps   | 389760        |
| time_elapsed       | 2.4e+04       |
| total_timesteps    | 389760        |
| value_loss         | 0.003369862   |
--------------------------------------
---------------------------------------
| approxkl           | 3.827232e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 2.99e+03       |
| ep_reward_mean     | 4.01           |
| explained_variance | -0.0016        |
| fps                | 24             |
| n_updates          | 3046           |
| policy_entropy     | 2.0053723      |
| policy_loss        | -0.00022495538 |
| se

--------------------------------------
| approxkl           | 1.1652426e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.01e+03      |
| ep_reward_mean     | 4.04          |
| explained_variance | -0.00341      |
| fps                | 24            |
| n_updates          | 3059          |
| policy_entropy     | 1.9491587     |
| policy_loss        | -0.0004961379 |
| serial_timesteps   | 391552        |
| time_elapsed       | 2.41e+04      |
| total_timesteps    | 391552        |
| value_loss         | 0.011570255   |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 1.0951058e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.01e+03      |
| ep_reward_mean     | 4.04          |
| explained_variance | -0.0058       |
| fps                | 10            |
| n_updates          | 3060          |
| policy_entropy     | 1.9559088     |
| policy_loss        | 5.537644e-05  |
| serial_times

Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 5.4093438e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.01e+03      |
| ep_reward_mean     | 4.05          |
| explained_variance | 0.204         |
| fps                | 8             |
| n_updates          | 3073          |
| policy_entropy     | 1.8625791     |
| policy_loss        | -0.001983082  |
| serial_timesteps   | 393344        |
| time_elapsed       | 2.42e+04      |
| total_timesteps    | 393344        |
| value_loss         | 0.0019792942  |
--------------------------------------
--------------------------------------
| approxkl           | 2.5278572e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.01e+03      |
| ep_reward_mean     | 4.05          |
| explained_variance | 0.0137        |
| fps                | 24            |
| n_updates          | 3074          |
| policy_entropy     | 1.93725

Round done
---------------------------------------
| approxkl           | 2.1627557e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.01e+03       |
| ep_reward_mean     | 4.05           |
| explained_variance | -0.000314      |
| fps                | 12             |
| n_updates          | 3087           |
| policy_entropy     | 2.034435       |
| policy_loss        | -0.00030103326 |
| serial_timesteps   | 395136         |
| time_elapsed       | 2.43e+04       |
| total_timesteps    | 395136         |
| value_loss         | 0.008108681    |
---------------------------------------
---------------------------------------
| approxkl           | 1.1828349e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.01e+03       |
| ep_reward_mean     | 4.05           |
| explained_variance | 0.0016         |
| fps                | 24             |
| n_updates          | 3088           |
| policy_entropy     | 2.0422535      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 1.3150878e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 2.99e+03      |
| ep_reward_mean     | 4.07          |
| explained_variance | -0.00586      |
| fps                | 24            |
| n_updates          | 3101          |
| policy_entropy     | 2.0454764     |
| policy_loss        | -0.0010143481 |
| serial_timesteps   | 396928        |
| time_elapsed       | 2.44e+04      |
| total_timesteps    | 396928        |
| value_loss         | 0.006512051   |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 1.7540553e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 2.99e+03       |
| ep_reward_mean     | 4.07           |
| explained_variance | 0.00223        |
| fps                | 12             |
| n_updates          | 3102           |
| policy_entropy     | 2.0532455      |
| policy_loss        | -0.00012856722 |
| se

--------------------------------------
| approxkl           | 1.0725855e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 2.99e+03      |
| ep_reward_mean     | 4.07          |
| explained_variance | -0.00656      |
| fps                | 23            |
| n_updates          | 3115          |
| policy_entropy     | 2.0877175     |
| policy_loss        | -0.0006785989 |
| serial_timesteps   | 398720        |
| time_elapsed       | 2.45e+04      |
| total_timesteps    | 398720        |
| value_loss         | 0.0035653824  |
--------------------------------------
--------------------------------------
| approxkl           | 1.7597047e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 2.99e+03      |
| ep_reward_mean     | 4.07          |
| explained_variance | -0.00134      |
| fps                | 24            |
| n_updates          | 3116          |
| policy_entropy     | 2.093727      |
| policy_loss        | -0.0007510409 |
| serial_timesteps   | 39

---------------------------------------
| approxkl           | 1.4656743e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3e+03          |
| ep_reward_mean     | 4.08           |
| explained_variance | 0.00293        |
| fps                | 23             |
| n_updates          | 3129           |
| policy_entropy     | 2.1284947      |
| policy_loss        | -0.00069681555 |
| serial_timesteps   | 400512         |
| time_elapsed       | 2.47e+04       |
| total_timesteps    | 400512         |
| value_loss         | 0.005124991    |
---------------------------------------
Stage done
--------------------------------------
| approxkl           | 1.6912047e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3e+03         |
| ep_reward_mean     | 4.08          |
| explained_variance | -0.00721      |
| fps                | 9             |
| n_updates          | 3130          |
| policy_entropy     | 2.1414223     |
| policy_loss        | -0.0005958006 |

Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 2.3794251e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.14          |
| explained_variance | 0.555         |
| fps                | 8             |
| n_updates          | 3143          |
| policy_entropy     | 2.1050384     |
| policy_loss        | -0.0004785061 |
| serial_timesteps   | 402304        |
| time_elapsed       | 2.48e+04      |
| total_timesteps    | 402304        |
| value_loss         | 0.0031721885  |
--------------------------------------
-------------------------------------
| approxkl           | 5.919187e-05 |
| clipfrac           | 0.0          |
| ep_len_mean        | 3.02e+03     |
| ep_reward_mean     | 4.14         |
| explained_variance | -0.00931     |
| fps                | 23           |
| n_updates          | 3144         |
| policy_entropy     | 2.0685525    |


Round done
---------------------------------------
| approxkl           | 1.1053138e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.02e+03       |
| ep_reward_mean     | 4.14           |
| explained_variance | -0.000976      |
| fps                | 10             |
| n_updates          | 3157           |
| policy_entropy     | 2.1482909      |
| policy_loss        | -0.00019029155 |
| serial_timesteps   | 404096         |
| time_elapsed       | 2.49e+04       |
| total_timesteps    | 404096         |
| value_loss         | 0.0039215945   |
---------------------------------------
--------------------------------------
| approxkl           | 3.4108573e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.14          |
| explained_variance | 0.000412      |
| fps                | 24            |
| n_updates          | 3158          |
| policy_entropy     | 2.1600738     |
| policy_loss        | 7.078517e-05  |

Round done
---------------------------------------
| approxkl           | 4.3333293e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.02e+03       |
| ep_reward_mean     | 4.12           |
| explained_variance | -0.00195       |
| fps                | 11             |
| n_updates          | 3171           |
| policy_entropy     | 2.1672513      |
| policy_loss        | -0.00045613572 |
| serial_timesteps   | 405888         |
| time_elapsed       | 2.5e+04        |
| total_timesteps    | 405888         |
| value_loss         | 0.004228135    |
---------------------------------------
--------------------------------------
| approxkl           | 9.502178e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.12          |
| explained_variance | -0.0024       |
| fps                | 23            |
| n_updates          | 3172          |
| policy_entropy     | 2.1738324     |
| policy_loss        | -0.0005665589 |

--------------------------------------
| approxkl           | 3.2853048e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.12          |
| explained_variance | 0.0519        |
| fps                | 24            |
| n_updates          | 3185          |
| policy_entropy     | 1.9828839     |
| policy_loss        | -0.0010464825 |
| serial_timesteps   | 407680        |
| time_elapsed       | 2.51e+04      |
| total_timesteps    | 407680        |
| value_loss         | 0.0017885461  |
--------------------------------------
---------------------------------------
| approxkl           | 4.926441e-05   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.02e+03       |
| ep_reward_mean     | 4.12           |
| explained_variance | -0.0415        |
| fps                | 24             |
| n_updates          | 3186           |
| policy_entropy     | 2.030587       |
| policy_loss        | -0.00047846325 |
| serial_timest

--------------------------------------
| approxkl           | 2.9641225e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.12          |
| explained_variance | 0.00163       |
| fps                | 24            |
| n_updates          | 3199          |
| policy_entropy     | 2.1702642     |
| policy_loss        | -0.0007778881 |
| serial_timesteps   | 409472        |
| time_elapsed       | 2.52e+04      |
| total_timesteps    | 409472        |
| value_loss         | 0.0064836587  |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 1.1948235e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.12          |
| explained_variance | -0.000295     |
| fps                | 10            |
| n_updates          | 3200          |
| policy_entropy     | 2.1704588     |
| policy_loss        | 0.00046510622 |
| serial_times

--------------------------------------
| approxkl           | 7.56724e-06   |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.12          |
| explained_variance | 0.00194       |
| fps                | 24            |
| n_updates          | 3213          |
| policy_entropy     | 2.1937933     |
| policy_loss        | -0.0003745146 |
| serial_timesteps   | 411264        |
| time_elapsed       | 2.53e+04      |
| total_timesteps    | 411264        |
| value_loss         | 0.0074412907  |
--------------------------------------
---------------------------------------
| approxkl           | 1.2875442e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.02e+03       |
| ep_reward_mean     | 4.12           |
| explained_variance | -0.00268       |
| fps                | 23             |
| n_updates          | 3214           |
| policy_entropy     | 2.1897826      |
| policy_loss        | -0.00061733276 |
| serial_timest

---------------------------------------
| approxkl           | 2.5084713e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.04e+03       |
| ep_reward_mean     | 4.19           |
| explained_variance | -0.000255      |
| fps                | 24             |
| n_updates          | 3227           |
| policy_entropy     | 2.0692296      |
| policy_loss        | -0.00015831273 |
| serial_timesteps   | 413056         |
| time_elapsed       | 2.54e+04       |
| total_timesteps    | 413056         |
| value_loss         | 0.0027604487   |
---------------------------------------
--------------------------------------
| approxkl           | 1.8010578e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.04e+03      |
| ep_reward_mean     | 4.19          |
| explained_variance | 0.0137        |
| fps                | 24            |
| n_updates          | 3228          |
| policy_entropy     | 2.0753393     |
| policy_loss        | -0.0012698472 |
| serial_t

--------------------------------------
| approxkl           | 7.02314e-05   |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.03e+03      |
| ep_reward_mean     | 4.2           |
| explained_variance | -0.0201       |
| fps                | 24            |
| n_updates          | 3241          |
| policy_entropy     | 1.9350682     |
| policy_loss        | -0.0018202253 |
| serial_timesteps   | 414848        |
| time_elapsed       | 2.55e+04      |
| total_timesteps    | 414848        |
| value_loss         | 0.010277453   |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 0.000105489176 |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.03e+03       |
| ep_reward_mean     | 4.2            |
| explained_variance | 0.00791        |
| fps                | 11             |
| n_updates          | 3242           |
| policy_entropy     | 1.9674134      |
| policy_loss        | -0.002226375   |
| se

Stage done
---------------------------------------
| approxkl           | 4.0016857e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.03e+03       |
| ep_reward_mean     | 4.2            |
| explained_variance | 0.000333       |
| fps                | 9              |
| n_updates          | 3255           |
| policy_entropy     | 2.0341501      |
| policy_loss        | -0.00035229325 |
| serial_timesteps   | 416640         |
| time_elapsed       | 2.56e+04       |
| total_timesteps    | 416640         |
| value_loss         | 0.008188011    |
---------------------------------------
---------------------------------------
| approxkl           | 3.6246056e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.03e+03       |
| ep_reward_mean     | 4.2            |
| explained_variance | -0.00333       |
| fps                | 24             |
| n_updates          | 3256           |
| policy_entropy     | 2.0343466      |
| policy_loss        | -0.000

Round done
--------------------------------------
| approxkl           | 8.3138584e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.2           |
| explained_variance | 0.00199       |
| fps                | 12            |
| n_updates          | 3269          |
| policy_entropy     | 1.9605267     |
| policy_loss        | -0.0010791197 |
| serial_timesteps   | 418432        |
| time_elapsed       | 2.58e+04      |
| total_timesteps    | 418432        |
| value_loss         | 0.0046883808  |
--------------------------------------
---------------------------------------
| approxkl           | 4.8343627e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.02e+03       |
| ep_reward_mean     | 4.2            |
| explained_variance | -0.00527       |
| fps                | 23             |
| n_updates          | 3270           |
| policy_entropy     | 1.9919248      |
| policy_loss        | -2.3273751e-05 |
| se

---------------------------------------
| approxkl           | 3.3729557e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.02e+03       |
| ep_reward_mean     | 4.2            |
| explained_variance | -0.00138       |
| fps                | 23             |
| n_updates          | 3283           |
| policy_entropy     | 2.1685233      |
| policy_loss        | -0.00020095333 |
| serial_timesteps   | 420224         |
| time_elapsed       | 2.59e+04       |
| total_timesteps    | 420224         |
| value_loss         | 0.0026234265   |
---------------------------------------
--------------------------------------
| approxkl           | 5.8917085e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.2           |
| explained_variance | 0.000235      |
| fps                | 24            |
| n_updates          | 3284          |
| policy_entropy     | 2.1673384     |
| policy_loss        | -0.0001930315 |
| serial_t

Round done
---------------------------------------
| approxkl           | 1.2624754e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.02e+03       |
| ep_reward_mean     | 4.19           |
| explained_variance | 0.00108        |
| fps                | 11             |
| n_updates          | 3297           |
| policy_entropy     | 2.1781273      |
| policy_loss        | -0.00046497583 |
| serial_timesteps   | 422016         |
| time_elapsed       | 2.6e+04        |
| total_timesteps    | 422016         |
| value_loss         | 0.007903984    |
---------------------------------------
---------------------------------------
| approxkl           | 1.0527001e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.02e+03       |
| ep_reward_mean     | 4.19           |
| explained_variance | -0.000504      |
| fps                | 23             |
| n_updates          | 3298           |
| policy_entropy     | 2.1867888      |
| policy_loss        | -4.333

--------------------------------------
| approxkl           | 7.1067884e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.19          |
| explained_variance | -0.00035      |
| fps                | 24            |
| n_updates          | 3311          |
| policy_entropy     | 2.193591      |
| policy_loss        | -5.256757e-05 |
| serial_timesteps   | 423808        |
| time_elapsed       | 2.61e+04      |
| total_timesteps    | 423808        |
| value_loss         | 0.002415967   |
--------------------------------------
---------------------------------------
| approxkl           | 2.9772418e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.02e+03       |
| ep_reward_mean     | 4.19           |
| explained_variance | -0.000679      |
| fps                | 24             |
| n_updates          | 3312           |
| policy_entropy     | 2.1911616      |
| policy_loss        | -0.00026303343 |
| serial_timest

Round done
---------------------------------------
| approxkl           | 4.943567e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.04e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | 0.00127        |
| fps                | 11             |
| n_updates          | 3325           |
| policy_entropy     | 2.1712737      |
| policy_loss        | -0.00017846003 |
| serial_timesteps   | 425600         |
| time_elapsed       | 2.62e+04       |
| total_timesteps    | 425600         |
| value_loss         | 0.0049555623   |
---------------------------------------
---------------------------------------
| approxkl           | 6.2428367e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.04e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | -0.00088       |
| fps                | 23             |
| n_updates          | 3326           |
| policy_entropy     | 2.1823783      |
| policy_loss        | -0.000

Round done
---------------------------------------
| approxkl           | 5.576232e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.04e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | 0.000661       |
| fps                | 11             |
| n_updates          | 3339           |
| policy_entropy     | 2.1777143      |
| policy_loss        | -0.00026843697 |
| serial_timesteps   | 427392         |
| time_elapsed       | 2.63e+04       |
| total_timesteps    | 427392         |
| value_loss         | 0.0032323538   |
---------------------------------------
---------------------------------------
| approxkl           | 5.3057975e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.04e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | 0.000585       |
| fps                | 23             |
| n_updates          | 3340           |
| policy_entropy     | 2.1797147      |
| policy_loss        | 1.3256

--------------------------------------
| approxkl           | 4.600139e-05  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.06e+03      |
| ep_reward_mean     | 4.24          |
| explained_variance | 0.000237      |
| fps                | 23            |
| n_updates          | 3353          |
| policy_entropy     | 2.1347482     |
| policy_loss        | 0.00018966943 |
| serial_timesteps   | 429184        |
| time_elapsed       | 2.64e+04      |
| total_timesteps    | 429184        |
| value_loss         | 0.0024391708  |
--------------------------------------
---------------------------------------
| approxkl           | 1.9783813e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.06e+03       |
| ep_reward_mean     | 4.24           |
| explained_variance | -0.00226       |
| fps                | 24             |
| n_updates          | 3354           |
| policy_entropy     | 2.1466486      |
| policy_loss        | -0.00058752764 |
| serial_timest

--------------------------------------
| approxkl           | 2.7371505e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.06e+03      |
| ep_reward_mean     | 4.24          |
| explained_variance | -0.00101      |
| fps                | 24            |
| n_updates          | 3367          |
| policy_entropy     | 2.171866      |
| policy_loss        | -0.0013594441 |
| serial_timesteps   | 430976        |
| time_elapsed       | 2.65e+04      |
| total_timesteps    | 430976        |
| value_loss         | 0.0052462984  |
--------------------------------------
Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
---------------------------------------
| approxkl           | 1.8541145e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.04e+03       |
| ep_reward_mean     | 4.21           |
| explained_variance | 0.0462         |
| fps                | 8              |
| n_updates          | 3368           |
| policy_entropy     |

Round done
---------------------------------------
| approxkl           | 5.6396384e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.04e+03       |
| ep_reward_mean     | 4.21           |
| explained_variance | 0.00322        |
| fps                | 11             |
| n_updates          | 3381           |
| policy_entropy     | 2.1748602      |
| policy_loss        | -0.00022622198 |
| serial_timesteps   | 432768         |
| time_elapsed       | 2.66e+04       |
| total_timesteps    | 432768         |
| value_loss         | 0.0028442144   |
---------------------------------------
--------------------------------------
| approxkl           | 1.1672958e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.04e+03      |
| ep_reward_mean     | 4.21          |
| explained_variance | -0.000813     |
| fps                | 23            |
| n_updates          | 3382          |
| policy_entropy     | 2.1802678     |
| policy_loss        | -0.0005337857 |

---------------------------------------
| approxkl           | 3.2434056e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.04e+03       |
| ep_reward_mean     | 4.21           |
| explained_variance | 0.00112        |
| fps                | 24             |
| n_updates          | 3395           |
| policy_entropy     | 2.2510867      |
| policy_loss        | -0.00011310121 |
| serial_timesteps   | 434560         |
| time_elapsed       | 2.67e+04       |
| total_timesteps    | 434560         |
| value_loss         | 0.0037941446   |
---------------------------------------
Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 2.0316536e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.04e+03      |
| ep_reward_mean     | 4.27          |
| explained_variance | -0.0837       |
| fps                | 8             |
| n_updates          | 3396          |
| policy_entrop

Round done
--------------------------------------
| approxkl           | 4.592415e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.04e+03      |
| ep_reward_mean     | 4.27          |
| explained_variance | -0.00157      |
| fps                | 10            |
| n_updates          | 3409          |
| policy_entropy     | 2.2243915     |
| policy_loss        | 8.8997185e-05 |
| serial_timesteps   | 436352        |
| time_elapsed       | 2.68e+04      |
| total_timesteps    | 436352        |
| value_loss         | 0.005831643   |
--------------------------------------
---------------------------------------
| approxkl           | 5.15954e-06    |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.04e+03       |
| ep_reward_mean     | 4.27           |
| explained_variance | 0.00105        |
| fps                | 24             |
| n_updates          | 3410           |
| policy_entropy     | 2.2300727      |
| policy_loss        | -0.00041319057 |
| se

Round done
--------------------------------------
| approxkl           | 8.45229e-06   |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.03e+03      |
| ep_reward_mean     | 4.26          |
| explained_variance | -0.00115      |
| fps                | 11            |
| n_updates          | 3423          |
| policy_entropy     | 2.1983159     |
| policy_loss        | 0.00013991445 |
| serial_timesteps   | 438144        |
| time_elapsed       | 2.7e+04       |
| total_timesteps    | 438144        |
| value_loss         | 0.0021344153  |
--------------------------------------
---------------------------------------
| approxkl           | 3.0464348e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.03e+03       |
| ep_reward_mean     | 4.26           |
| explained_variance | 0.00184        |
| fps                | 23             |
| n_updates          | 3424           |
| policy_entropy     | 2.1979303      |
| policy_loss        | -0.00021917745 |
| se

Round done
---------------------------------------
| approxkl           | 1.2444181e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.03e+03       |
| ep_reward_mean     | 4.26           |
| explained_variance | -1.66e-05      |
| fps                | 9              |
| n_updates          | 3437           |
| policy_entropy     | 2.228323       |
| policy_loss        | -0.00010206178 |
| serial_timesteps   | 439936         |
| time_elapsed       | 2.71e+04       |
| total_timesteps    | 439936         |
| value_loss         | 0.0039462675   |
---------------------------------------
---------------------------------------
| approxkl           | 2.8321883e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.03e+03       |
| ep_reward_mean     | 4.26           |
| explained_variance | -0.00052       |
| fps                | 23             |
| n_updates          | 3438           |
| policy_entropy     | 2.2302673      |
| policy_loss        | -0.000

Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
---------------------------------------
| approxkl           | 9.173469e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.04e+03       |
| ep_reward_mean     | 4.31           |
| explained_variance | -0.00279       |
| fps                | 8              |
| n_updates          | 3451           |
| policy_entropy     | 2.1784124      |
| policy_loss        | -0.00043667108 |
| serial_timesteps   | 441728         |
| time_elapsed       | 2.72e+04       |
| total_timesteps    | 441728         |
| value_loss         | 0.00706008     |
---------------------------------------
--------------------------------------
| approxkl           | 3.8832546e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.04e+03      |
| ep_reward_mean     | 4.31          |
| explained_variance | -0.0274       |
| fps                | 23            |
| n_updates          | 3452          |
| policy_entrop

---------------------------------------
| approxkl           | 3.7145746e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.04e+03       |
| ep_reward_mean     | 4.31           |
| explained_variance | -0.00148       |
| fps                | 23             |
| n_updates          | 3465           |
| policy_entropy     | 2.15558        |
| policy_loss        | -0.00025367364 |
| serial_timesteps   | 443520         |
| time_elapsed       | 2.73e+04       |
| total_timesteps    | 443520         |
| value_loss         | 0.0069718305   |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 4.335141e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.04e+03       |
| ep_reward_mean     | 4.31           |
| explained_variance | -0.000695      |
| fps                | 9              |
| n_updates          | 3466           |
| policy_entropy     | 2.161308       |
| policy_loss        | -0.000

---------------------------------------
| approxkl           | 1.4080009e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.04e+03       |
| ep_reward_mean     | 4.27           |
| explained_variance | -0.0535        |
| fps                | 24             |
| n_updates          | 3479           |
| policy_entropy     | 2.0541866      |
| policy_loss        | -0.00062313117 |
| serial_timesteps   | 445312         |
| time_elapsed       | 2.74e+04       |
| total_timesteps    | 445312         |
| value_loss         | 0.0056560324   |
---------------------------------------
--------------------------------------
| approxkl           | 8.686734e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.04e+03      |
| ep_reward_mean     | 4.27          |
| explained_variance | -0.0141       |
| fps                | 24            |
| n_updates          | 3480          |
| policy_entropy     | 2.0926728     |
| policy_loss        | -0.0002476871 |
| serial_t

---------------------------------------
| approxkl           | 3.0645947e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.04e+03       |
| ep_reward_mean     | 4.27           |
| explained_variance | 0.00139        |
| fps                | 23             |
| n_updates          | 3493           |
| policy_entropy     | 2.1499462      |
| policy_loss        | -0.00025461242 |
| serial_timesteps   | 447104         |
| time_elapsed       | 2.75e+04       |
| total_timesteps    | 447104         |
| value_loss         | 0.00860431     |
---------------------------------------
Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 1.4433179e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.27          |
| explained_variance | -0.0165       |
| fps                | 8             |
| n_updates          | 3494          |
| policy_entrop

-------------------------------------
| approxkl           | 5.6532e-06   |
| clipfrac           | 0.0          |
| ep_len_mean        | 3.02e+03     |
| ep_reward_mean     | 4.27         |
| explained_variance | 0.000822     |
| fps                | 23           |
| n_updates          | 3507         |
| policy_entropy     | 2.1396177    |
| policy_loss        | 0.0001273714 |
| serial_timesteps   | 448896       |
| time_elapsed       | 2.76e+04     |
| total_timesteps    | 448896       |
| value_loss         | 0.0059409454 |
-------------------------------------
--------------------------------------
| approxkl           | 3.3868114e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.27          |
| explained_variance | 0.00144       |
| fps                | 24            |
| n_updates          | 3508          |
| policy_entropy     | 2.1409123     |
| policy_loss        | -0.0001856219 |
| serial_timesteps   | 449024        |
|

--------------------------------------
| approxkl           | 4.8968086e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.27          |
| explained_variance | -0.000103     |
| fps                | 23            |
| n_updates          | 3521          |
| policy_entropy     | 2.1629868     |
| policy_loss        | -0.0005385354 |
| serial_timesteps   | 450688        |
| time_elapsed       | 2.77e+04      |
| total_timesteps    | 450688        |
| value_loss         | 0.0038818282  |
--------------------------------------
Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
---------------------------------------
| approxkl           | 4.8888414e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.05e+03       |
| ep_reward_mean     | 4.33           |
| explained_variance | 0.0315         |
| fps                | 8              |
| n_updates          | 3522           |
| policy_entropy     |

--------------------------------------
| approxkl           | 5.347096e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.05e+03      |
| ep_reward_mean     | 4.33          |
| explained_variance | -0.00213      |
| fps                | 22            |
| n_updates          | 3535          |
| policy_entropy     | 2.1559088     |
| policy_loss        | 5.4858625e-05 |
| serial_timesteps   | 452480        |
| time_elapsed       | 2.78e+04      |
| total_timesteps    | 452480        |
| value_loss         | 0.0017703073  |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 2.8798888e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.05e+03      |
| ep_reward_mean     | 4.33          |
| explained_variance | -0.001        |
| fps                | 10            |
| n_updates          | 3536          |
| policy_entropy     | 2.157621      |
| policy_loss        | -0.0001967568 |
| serial_times

Round done
---------------------------------------
| approxkl           | 2.39076e-05    |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.06e+03       |
| ep_reward_mean     | 4.34           |
| explained_variance | -0.0131        |
| fps                | 12             |
| n_updates          | 3549           |
| policy_entropy     | 2.1096423      |
| policy_loss        | -0.00073676556 |
| serial_timesteps   | 454272         |
| time_elapsed       | 2.79e+04       |
| total_timesteps    | 454272         |
| value_loss         | 0.0033805852   |
---------------------------------------
--------------------------------------
| approxkl           | 1.0464759e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.06e+03      |
| ep_reward_mean     | 4.34          |
| explained_variance | 0.00192       |
| fps                | 23            |
| n_updates          | 3550          |
| policy_entropy     | 2.131865      |
| policy_loss        | 0.00018267147 |

Round done
---------------------------------------
| approxkl           | 6.128162e-05   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.06e+03       |
| ep_reward_mean     | 4.32           |
| explained_variance | 0.0208         |
| fps                | 11             |
| n_updates          | 3563           |
| policy_entropy     | 2.1082702      |
| policy_loss        | -0.00094982074 |
| serial_timesteps   | 456064         |
| time_elapsed       | 2.81e+04       |
| total_timesteps    | 456064         |
| value_loss         | 0.0029972638   |
---------------------------------------
--------------------------------------
| approxkl           | 2.2144694e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.06e+03      |
| ep_reward_mean     | 4.32          |
| explained_variance | 0.0221        |
| fps                | 23            |
| n_updates          | 3564          |
| policy_entropy     | 2.144162      |
| policy_loss        | -0.0002265498 |

---------------------------------------
| approxkl           | 1.2395234e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.06e+03       |
| ep_reward_mean     | 4.32           |
| explained_variance | 0.000902       |
| fps                | 23             |
| n_updates          | 3577           |
| policy_entropy     | 2.262885       |
| policy_loss        | -0.00035008788 |
| serial_timesteps   | 457856         |
| time_elapsed       | 2.82e+04       |
| total_timesteps    | 457856         |
| value_loss         | 0.004942838    |
---------------------------------------
--------------------------------------
| approxkl           | 4.4820463e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.06e+03      |
| ep_reward_mean     | 4.32          |
| explained_variance | -0.00138      |
| fps                | 22            |
| n_updates          | 3578          |
| policy_entropy     | 2.2672863     |
| policy_loss        | 0.0001575984  |
| serial_t

---------------------------------------
| approxkl           | 1.9605124e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.05e+03       |
| ep_reward_mean     | 4.28           |
| explained_variance | 0.00128        |
| fps                | 23             |
| n_updates          | 3591           |
| policy_entropy     | 2.228662       |
| policy_loss        | -0.00048268214 |
| serial_timesteps   | 459648         |
| time_elapsed       | 2.83e+04       |
| total_timesteps    | 459648         |
| value_loss         | 0.0027697312   |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 2.2115113e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.05e+03       |
| ep_reward_mean     | 4.28           |
| explained_variance | -0.00102       |
| fps                | 9              |
| n_updates          | 3592           |
| policy_entropy     | 2.2439828      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 4.111152e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.05e+03      |
| ep_reward_mean     | 4.28          |
| explained_variance | -0.00228      |
| fps                | 23            |
| n_updates          | 3605          |
| policy_entropy     | 2.256766      |
| policy_loss        | -0.0008208798 |
| serial_timesteps   | 461440        |
| time_elapsed       | 2.84e+04      |
| total_timesteps    | 461440        |
| value_loss         | 0.007964698   |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 1.2609384e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.05e+03      |
| ep_reward_mean     | 4.28          |
| explained_variance | 0.00207       |
| fps                | 10            |
| n_updates          | 3606          |
| policy_entropy     | 2.253274      |
| policy_loss        | -0.0005803928 |
| serial_times

--------------------------------------
| approxkl           | 7.225551e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.05e+03      |
| ep_reward_mean     | 4.28          |
| explained_variance | 0.000545      |
| fps                | 22            |
| n_updates          | 3619          |
| policy_entropy     | 2.214242      |
| policy_loss        | -0.0003825873 |
| serial_timesteps   | 463232        |
| time_elapsed       | 2.85e+04      |
| total_timesteps    | 463232        |
| value_loss         | 0.0023158079  |
--------------------------------------
--------------------------------------
| approxkl           | 5.883108e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.05e+03      |
| ep_reward_mean     | 4.28          |
| explained_variance | -0.000746     |
| fps                | 22            |
| n_updates          | 3620          |
| policy_entropy     | 2.210189      |
| policy_loss        | -0.0001067929 |
| serial_timesteps   | 46

---------------------------------------
| approxkl           | 3.5777216e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.07e+03       |
| ep_reward_mean     | 4.33           |
| explained_variance | 0.000852       |
| fps                | 23             |
| n_updates          | 3633           |
| policy_entropy     | 2.1617446      |
| policy_loss        | -0.00030406378 |
| serial_timesteps   | 465024         |
| time_elapsed       | 2.86e+04       |
| total_timesteps    | 465024         |
| value_loss         | 0.0048347153   |
---------------------------------------
Stage done
--------------------------------------
| approxkl           | 2.3415646e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.07e+03      |
| ep_reward_mean     | 4.33          |
| explained_variance | 0.00343       |
| fps                | 9             |
| n_updates          | 3634          |
| policy_entropy     | 2.1660373     |
| policy_loss        | 9.818748e-05  |

--------------------------------------
| approxkl           | 6.248817e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.07e+03      |
| ep_reward_mean     | 4.33          |
| explained_variance | 0.00018       |
| fps                | 23            |
| n_updates          | 3647          |
| policy_entropy     | 2.1998653     |
| policy_loss        | -0.0002825819 |
| serial_timesteps   | 466816        |
| time_elapsed       | 2.87e+04      |
| total_timesteps    | 466816        |
| value_loss         | 0.006220514   |
--------------------------------------
--------------------------------------
| approxkl           | 3.1127747e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.07e+03      |
| ep_reward_mean     | 4.33          |
| explained_variance | 0.000533      |
| fps                | 22            |
| n_updates          | 3648          |
| policy_entropy     | 2.197277      |
| policy_loss        | 0.00014514662 |
| serial_timesteps   | 46

---------------------------------------
| approxkl           | 1.7539049e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.07e+03       |
| ep_reward_mean     | 4.32           |
| explained_variance | 0.00181        |
| fps                | 23             |
| n_updates          | 3661           |
| policy_entropy     | 2.2120743      |
| policy_loss        | -0.00083901174 |
| serial_timesteps   | 468608         |
| time_elapsed       | 2.89e+04       |
| total_timesteps    | 468608         |
| value_loss         | 0.0022498278   |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 1.03624925e-05 |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.07e+03       |
| ep_reward_mean     | 4.32           |
| explained_variance | 0.00472        |
| fps                | 10             |
| n_updates          | 3662           |
| policy_entropy     | 2.221779       |
| policy_loss        | 0.0002

--------------------------------------
| approxkl           | 2.9964413e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.07e+03      |
| ep_reward_mean     | 4.32          |
| explained_variance | 0.00301       |
| fps                | 23            |
| n_updates          | 3675          |
| policy_entropy     | 2.2411249     |
| policy_loss        | -0.0003059078 |
| serial_timesteps   | 470400        |
| time_elapsed       | 2.9e+04       |
| total_timesteps    | 470400        |
| value_loss         | 0.005397832   |
--------------------------------------
---------------------------------------
| approxkl           | 5.4842603e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.07e+03       |
| ep_reward_mean     | 4.32           |
| explained_variance | -0.00241       |
| fps                | 23             |
| n_updates          | 3676           |
| policy_entropy     | 2.2459278      |
| policy_loss        | -0.00018989295 |
| serial_timest

Round done
---------------------------------------
| approxkl           | 2.4563127e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.08e+03       |
| ep_reward_mean     | 4.34           |
| explained_variance | 0.000426       |
| fps                | 10             |
| n_updates          | 3689           |
| policy_entropy     | 2.2246687      |
| policy_loss        | -0.00086362846 |
| serial_timesteps   | 472192         |
| time_elapsed       | 2.91e+04       |
| total_timesteps    | 472192         |
| value_loss         | 0.0040290914   |
---------------------------------------
--------------------------------------
| approxkl           | 1.3305253e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.08e+03      |
| ep_reward_mean     | 4.34          |
| explained_variance | -0.000416     |
| fps                | 23            |
| n_updates          | 3690          |
| policy_entropy     | 2.2502468     |
| policy_loss        | 0.00019238144 |

--------------------------------------
| approxkl           | 2.3524785e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.08e+03      |
| ep_reward_mean     | 4.34          |
| explained_variance | 6.66e-05      |
| fps                | 23            |
| n_updates          | 3703          |
| policy_entropy     | 2.2880054     |
| policy_loss        | 0.00011594873 |
| serial_timesteps   | 473984        |
| time_elapsed       | 2.92e+04      |
| total_timesteps    | 473984        |
| value_loss         | 0.007918644   |
--------------------------------------
---------------------------------------
| approxkl           | 1.4523018e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.08e+03       |
| ep_reward_mean     | 4.34           |
| explained_variance | -6.83e-05      |
| fps                | 23             |
| n_updates          | 3704           |
| policy_entropy     | 2.2896945      |
| policy_loss        | -0.00014498364 |
| serial_timest

--------------------------------------
| approxkl           | 1.3697526e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.08e+03      |
| ep_reward_mean     | 4.32          |
| explained_variance | 0.00178       |
| fps                | 23            |
| n_updates          | 3717          |
| policy_entropy     | 2.2671304     |
| policy_loss        | 8.956343e-05  |
| serial_timesteps   | 475776        |
| time_elapsed       | 2.93e+04      |
| total_timesteps    | 475776        |
| value_loss         | 0.0027481238  |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 5.9947415e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.08e+03       |
| ep_reward_mean     | 4.32           |
| explained_variance | 0.0114         |
| fps                | 10             |
| n_updates          | 3718           |
| policy_entropy     | 2.287665       |
| policy_loss        | -0.00042237062 |
| se

Stage done
---------------------------------------
| approxkl           | 4.8997213e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.08e+03       |
| ep_reward_mean     | 4.32           |
| explained_variance | -0.000644      |
| fps                | 10             |
| n_updates          | 3731           |
| policy_entropy     | 2.3060186      |
| policy_loss        | -0.00016203709 |
| serial_timesteps   | 477568         |
| time_elapsed       | 2.94e+04       |
| total_timesteps    | 477568         |
| value_loss         | 0.005596582    |
---------------------------------------
---------------------------------------
| approxkl           | 4.514359e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.08e+03       |
| ep_reward_mean     | 4.32           |
| explained_variance | 0.00215        |
| fps                | 23             |
| n_updates          | 3732           |
| policy_entropy     | 2.3031383      |
| policy_loss        | -0.000

Stage done
---------------------------------------
| approxkl           | 5.908662e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.08e+03       |
| ep_reward_mean     | 4.32           |
| explained_variance | 0.00014        |
| fps                | 9              |
| n_updates          | 3745           |
| policy_entropy     | 2.3042483      |
| policy_loss        | -0.00015981123 |
| serial_timesteps   | 479360         |
| time_elapsed       | 2.95e+04       |
| total_timesteps    | 479360         |
| value_loss         | 0.005934294    |
---------------------------------------
---------------------------------------
| approxkl           | 1.4468264e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.08e+03       |
| ep_reward_mean     | 4.32           |
| explained_variance | -0.00118       |
| fps                | 22             |
| n_updates          | 3746           |
| policy_entropy     | 2.3063478      |
| policy_loss        | -0.000

---------------------------------------
| approxkl           | 9.973386e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.09e+03       |
| ep_reward_mean     | 4.32           |
| explained_variance | 0.0025         |
| fps                | 23             |
| n_updates          | 3759           |
| policy_entropy     | 2.2829037      |
| policy_loss        | -0.00040081516 |
| serial_timesteps   | 481152         |
| time_elapsed       | 2.96e+04       |
| total_timesteps    | 481152         |
| value_loss         | 0.00267461     |
---------------------------------------
--------------------------------------
| approxkl           | 6.985775e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.09e+03      |
| ep_reward_mean     | 4.32          |
| explained_variance | -0.00126      |
| fps                | 23            |
| n_updates          | 3760          |
| policy_entropy     | 2.2886012     |
| policy_loss        | -8.095708e-05 |
| serial_t

--------------------------------------
| approxkl           | 1.2084248e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.09e+03      |
| ep_reward_mean     | 4.33          |
| explained_variance | -0.00145      |
| fps                | 22            |
| n_updates          | 3773          |
| policy_entropy     | 2.2807531     |
| policy_loss        | -0.0002459907 |
| serial_timesteps   | 482944        |
| time_elapsed       | 2.98e+04      |
| total_timesteps    | 482944        |
| value_loss         | 0.0053077056  |
--------------------------------------
---------------------------------------
| approxkl           | 8.477058e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.09e+03       |
| ep_reward_mean     | 4.33           |
| explained_variance | 0.000267       |
| fps                | 22             |
| n_updates          | 3774           |
| policy_entropy     | 2.288272       |
| policy_loss        | -0.00048397202 |
| serial_timest

---------------------------------------
| approxkl           | 2.13221e-06    |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.05e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | 0.00254        |
| fps                | 22             |
| n_updates          | 3787           |
| policy_entropy     | 2.316834       |
| policy_loss        | -0.00014675409 |
| serial_timesteps   | 484736         |
| time_elapsed       | 2.99e+04       |
| total_timesteps    | 484736         |
| value_loss         | 0.005232746    |
---------------------------------------
---------------------------------------
| approxkl           | 1.4017052e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.05e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | 0.00589        |
| fps                | 23             |
| n_updates          | 3788           |
| policy_entropy     | 2.3182993      |
| policy_loss        | -0.00013489276 |


---------------------------------------
| approxkl           | 2.3744253e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.03e+03       |
| ep_reward_mean     | 4.17           |
| explained_variance | 0.00457        |
| fps                | 23             |
| n_updates          | 3801           |
| policy_entropy     | 2.3106897      |
| policy_loss        | -0.00040392112 |
| serial_timesteps   | 486528         |
| time_elapsed       | 3e+04          |
| total_timesteps    | 486528         |
| value_loss         | 0.0032172017   |
---------------------------------------
Round done
--------------------------------------
| approxkl           | 1.5635148e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.03e+03      |
| ep_reward_mean     | 4.17          |
| explained_variance | 0.00109       |
| fps                | 10            |
| n_updates          | 3802          |
| policy_entropy     | 2.311225      |
| policy_loss        | -8.42181e-05  |

---------------------------------------
| approxkl           | 3.921643e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.02e+03       |
| ep_reward_mean     | 4.14           |
| explained_variance | -0.00363       |
| fps                | 23             |
| n_updates          | 3815           |
| policy_entropy     | 2.2521534      |
| policy_loss        | -5.3395983e-05 |
| serial_timesteps   | 488320         |
| time_elapsed       | 3.01e+04       |
| total_timesteps    | 488320         |
| value_loss         | 0.0030888636   |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 7.466965e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.02e+03       |
| ep_reward_mean     | 4.14           |
| explained_variance | 0.00134        |
| fps                | 9              |
| n_updates          | 3816           |
| policy_entropy     | 2.2658608      |
| policy_loss        | -0.000

Stage done
--------------------------------------
| approxkl           | 2.9722578e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.14          |
| explained_variance | 0.00285       |
| fps                | 9             |
| n_updates          | 3829          |
| policy_entropy     | 2.2876103     |
| policy_loss        | -0.0001069773 |
| serial_timesteps   | 490112        |
| time_elapsed       | 3.02e+04      |
| total_timesteps    | 490112        |
| value_loss         | 0.009478606   |
--------------------------------------
---------------------------------------
| approxkl           | 2.0398616e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.02e+03       |
| ep_reward_mean     | 4.14           |
| explained_variance | 0.000948       |
| fps                | 23             |
| n_updates          | 3830           |
| policy_entropy     | 2.287641       |
| policy_loss        | -0.00015696324 |
| se

Round done
--------------------------------------
| approxkl           | 2.8056477e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.18          |
| explained_variance | -0.00508      |
| fps                | 10            |
| n_updates          | 3843          |
| policy_entropy     | 2.267912      |
| policy_loss        | -0.0003497377 |
| serial_timesteps   | 491904        |
| time_elapsed       | 3.03e+04      |
| total_timesteps    | 491904        |
| value_loss         | 0.0032880674  |
--------------------------------------
---------------------------------------
| approxkl           | 3.9891615e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.02e+03       |
| ep_reward_mean     | 4.18           |
| explained_variance | 0.00151        |
| fps                | 23             |
| n_updates          | 3844           |
| policy_entropy     | 2.2709289      |
| policy_loss        | -0.00037580356 |
| se

--------------------------------------
| approxkl           | 7.084215e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.02e+03      |
| ep_reward_mean     | 4.18          |
| explained_variance | 0.0031        |
| fps                | 23            |
| n_updates          | 3857          |
| policy_entropy     | 2.3140342     |
| policy_loss        | -0.0001754351 |
| serial_timesteps   | 493696        |
| time_elapsed       | 3.04e+04      |
| total_timesteps    | 493696        |
| value_loss         | 0.0047522443  |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 1.3559425e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.02e+03       |
| ep_reward_mean     | 4.18           |
| explained_variance | -0.000786      |
| fps                | 9              |
| n_updates          | 3858           |
| policy_entropy     | 2.3155274      |
| policy_loss        | -0.00022958592 |
| se

--------------------------------------
| approxkl           | 1.0516901e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.03e+03      |
| ep_reward_mean     | 4.22          |
| explained_variance | 0.00236       |
| fps                | 23            |
| n_updates          | 3871          |
| policy_entropy     | 2.2788877     |
| policy_loss        | -9.700656e-05 |
| serial_timesteps   | 495488        |
| time_elapsed       | 3.06e+04      |
| total_timesteps    | 495488        |
| value_loss         | 0.005384796   |
--------------------------------------
---------------------------------------
| approxkl           | 1.02834e-06    |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.03e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | -0.00282       |
| fps                | 23             |
| n_updates          | 3872           |
| policy_entropy     | 2.2804356      |
| policy_loss        | -5.6144083e-05 |
| serial_timest

--------------------------------------
| approxkl           | 1.2186078e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.03e+03      |
| ep_reward_mean     | 4.22          |
| explained_variance | -0.000265     |
| fps                | 23            |
| n_updates          | 3885          |
| policy_entropy     | 2.2847328     |
| policy_loss        | -9.169057e-05 |
| serial_timesteps   | 497280        |
| time_elapsed       | 3.07e+04      |
| total_timesteps    | 497280        |
| value_loss         | 0.0047690966  |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 8.282426e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.03e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | -0.00255       |
| fps                | 10             |
| n_updates          | 3886           |
| policy_entropy     | 2.2857978      |
| policy_loss        | -0.00017292239 |
| se

--------------------------------------
| approxkl           | 1.6686299e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.03e+03      |
| ep_reward_mean     | 4.22          |
| explained_variance | 0.00334       |
| fps                | 23            |
| n_updates          | 3899          |
| policy_entropy     | 2.2741358     |
| policy_loss        | -0.0002799728 |
| serial_timesteps   | 499072        |
| time_elapsed       | 3.08e+04      |
| total_timesteps    | 499072        |
| value_loss         | 0.0017538735  |
--------------------------------------
Stage done
---------------------------------------
| approxkl           | 1.6283474e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.03e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | 0.00422        |
| fps                | 9              |
| n_updates          | 3900           |
| policy_entropy     | 2.2741618      |
| policy_loss        | -0.00019257888 |
| se

--------------------------------------
| approxkl           | 2.0623233e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.05e+03      |
| ep_reward_mean     | 4.27          |
| explained_variance | 0.00668       |
| fps                | 22            |
| n_updates          | 3913          |
| policy_entropy     | 2.182651      |
| policy_loss        | -0.0010220744 |
| serial_timesteps   | 500864        |
| time_elapsed       | 3.09e+04      |
| total_timesteps    | 500864        |
| value_loss         | 0.00406224    |
--------------------------------------
---------------------------------------
| approxkl           | 2.9248948e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.05e+03       |
| ep_reward_mean     | 4.27           |
| explained_variance | -0.00873       |
| fps                | 21             |
| n_updates          | 3914           |
| policy_entropy     | 2.1834135      |
| policy_loss        | -0.00021519884 |
| serial_timest

--------------------------------------
| approxkl           | 1.100591e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.05e+03      |
| ep_reward_mean     | 4.27          |
| explained_variance | -0.00181      |
| fps                | 22            |
| n_updates          | 3927          |
| policy_entropy     | 2.2502165     |
| policy_loss        | -8.330122e-05 |
| serial_timesteps   | 502656        |
| time_elapsed       | 3.1e+04       |
| total_timesteps    | 502656        |
| value_loss         | 0.006997954   |
--------------------------------------
---------------------------------------
| approxkl           | 2.3234973e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.05e+03       |
| ep_reward_mean     | 4.27           |
| explained_variance | -0.00137       |
| fps                | 22             |
| n_updates          | 3928           |
| policy_entropy     | 2.2537978      |
| policy_loss        | -0.00020459911 |
| serial_timest

Round done
---------------------------------------
| approxkl           | 1.3609858e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.05e+03       |
| ep_reward_mean     | 4.27           |
| explained_variance | 0.000307       |
| fps                | 10             |
| n_updates          | 3941           |
| policy_entropy     | 2.2704272      |
| policy_loss        | -0.00014215708 |
| serial_timesteps   | 504448         |
| time_elapsed       | 3.11e+04       |
| total_timesteps    | 504448         |
| value_loss         | 0.005418319    |
---------------------------------------
--------------------------------------
| approxkl           | 3.5254754e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.05e+03      |
| ep_reward_mean     | 4.27          |
| explained_variance | -0.000486     |
| fps                | 22            |
| n_updates          | 3942          |
| policy_entropy     | 2.2691927     |
| policy_loss        | -0.000393264  |

--------------------------------------
| approxkl           | 8.963532e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.07e+03      |
| ep_reward_mean     | 4.27          |
| explained_variance | -0.00226      |
| fps                | 22            |
| n_updates          | 3955          |
| policy_entropy     | 2.218359      |
| policy_loss        | 3.6917627e-05 |
| serial_timesteps   | 506240        |
| time_elapsed       | 3.13e+04      |
| total_timesteps    | 506240        |
| value_loss         | 0.0038216827  |
--------------------------------------
--------------------------------------
| approxkl           | 8.4401387e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.07e+03      |
| ep_reward_mean     | 4.27          |
| explained_variance | 0.00409       |
| fps                | 21            |
| n_updates          | 3956          |
| policy_entropy     | 2.2218156     |
| policy_loss        | -0.0001814384 |
| serial_timesteps   | 50

Stage done
---------------------------------------
| approxkl           | 9.939535e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.05e+03       |
| ep_reward_mean     | 4.29           |
| explained_variance | 0.00479        |
| fps                | 8              |
| n_updates          | 3969           |
| policy_entropy     | 2.2070718      |
| policy_loss        | -0.00012118369 |
| serial_timesteps   | 508032         |
| time_elapsed       | 3.14e+04       |
| total_timesteps    | 508032         |
| value_loss         | 0.0035920944   |
---------------------------------------
--------------------------------------
| approxkl           | 3.48778e-06   |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.05e+03      |
| ep_reward_mean     | 4.29          |
| explained_variance | 0.00236       |
| fps                | 21            |
| n_updates          | 3970          |
| policy_entropy     | 2.2097948     |
| policy_loss        | -0.0004202372 |

--------------------------------------
| approxkl           | 6.095485e-05  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.04e+03      |
| ep_reward_mean     | 4.24          |
| explained_variance | 0.0751        |
| fps                | 22            |
| n_updates          | 3983          |
| policy_entropy     | 1.881429      |
| policy_loss        | -0.0018222779 |
| serial_timesteps   | 509824        |
| time_elapsed       | 3.15e+04      |
| total_timesteps    | 509824        |
| value_loss         | 0.0030003749  |
--------------------------------------
--------------------------------------
| approxkl           | 0.00013333526 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.04e+03      |
| ep_reward_mean     | 4.24          |
| explained_variance | 0.00014       |
| fps                | 22            |
| n_updates          | 3984          |
| policy_entropy     | 1.9850947     |
| policy_loss        | -0.0007414967 |
| serial_timesteps   | 50

--------------------------------------
| approxkl           | 6.5605363e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.04e+03      |
| ep_reward_mean     | 4.24          |
| explained_variance | -0.00579      |
| fps                | 22            |
| n_updates          | 3997          |
| policy_entropy     | 2.2082891     |
| policy_loss        | -0.0002828762 |
| serial_timesteps   | 511616        |
| time_elapsed       | 3.16e+04      |
| total_timesteps    | 511616        |
| value_loss         | 0.007433666   |
--------------------------------------
--------------------------------------
| approxkl           | 1.0523248e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.04e+03      |
| ep_reward_mean     | 4.24          |
| explained_variance | -0.00333      |
| fps                | 22            |
| n_updates          | 3998          |
| policy_entropy     | 2.2157474     |
| policy_loss        | -0.0002163446 |
| serial_timesteps   | 51

--------------------------------------
| approxkl           | 1.4948004e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.06e+03      |
| ep_reward_mean     | 4.27          |
| explained_variance | 0.00486       |
| fps                | 21            |
| n_updates          | 4011          |
| policy_entropy     | 2.192658      |
| policy_loss        | -7.279962e-05 |
| serial_timesteps   | 513408        |
| time_elapsed       | 3.17e+04      |
| total_timesteps    | 513408        |
| value_loss         | 0.0056293644  |
--------------------------------------
Stage done
---------------------------------------
| approxkl           | 2.6349367e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.06e+03       |
| ep_reward_mean     | 4.27           |
| explained_variance | 0.00116        |
| fps                | 9              |
| n_updates          | 4012           |
| policy_entropy     | 2.193942       |
| policy_loss        | -0.00018706173 |
| se

---------------------------------------
| approxkl           | 9.322137e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.05e+03       |
| ep_reward_mean     | 4.26           |
| explained_variance | 0.000972       |
| fps                | 21             |
| n_updates          | 4025           |
| policy_entropy     | 2.1905951      |
| policy_loss        | -0.00075095706 |
| serial_timesteps   | 515200         |
| time_elapsed       | 3.19e+04       |
| total_timesteps    | 515200         |
| value_loss         | 0.0043994803   |
---------------------------------------
--------------------------------------
| approxkl           | 2.5240788e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.05e+03      |
| ep_reward_mean     | 4.26          |
| explained_variance | -0.00451      |
| fps                | 22            |
| n_updates          | 4026          |
| policy_entropy     | 2.20065       |
| policy_loss        | -0.0013972186 |
| serial_t

-------------------------------------
| approxkl           | 6.313815e-06 |
| clipfrac           | 0.0          |
| ep_len_mean        | 3.05e+03     |
| ep_reward_mean     | 4.26         |
| explained_variance | 0.000801     |
| fps                | 22           |
| n_updates          | 4039         |
| policy_entropy     | 2.2402377    |
| policy_loss        | -0.000494726 |
| serial_timesteps   | 516992       |
| time_elapsed       | 3.2e+04      |
| total_timesteps    | 516992       |
| value_loss         | 0.0064967196 |
-------------------------------------
---------------------------------------
| approxkl           | 1.0507902e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.05e+03       |
| ep_reward_mean     | 4.26           |
| explained_variance | 0.000259       |
| fps                | 22             |
| n_updates          | 4040           |
| policy_entropy     | 2.2363133      |
| policy_loss        | -0.00031252392 |
| serial_timesteps   | 517120 

Round done
--------------------------------------
| approxkl           | 2.7955419e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.07e+03      |
| ep_reward_mean     | 4.38          |
| explained_variance | 0.00156       |
| fps                | 10            |
| n_updates          | 4053          |
| policy_entropy     | 2.1881344     |
| policy_loss        | -0.0011114078 |
| serial_timesteps   | 518784        |
| time_elapsed       | 3.21e+04      |
| total_timesteps    | 518784        |
| value_loss         | 0.0034869937  |
--------------------------------------
--------------------------------------
| approxkl           | 8.295272e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.07e+03      |
| ep_reward_mean     | 4.38          |
| explained_variance | 0.00109       |
| fps                | 22            |
| n_updates          | 4054          |
| policy_entropy     | 2.208338      |
| policy_loss        | 0.00057867216 |
| serial_times

---------------------------------------
| approxkl           | 2.4817239e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.07e+03       |
| ep_reward_mean     | 4.38           |
| explained_variance | 0.00456        |
| fps                | 22             |
| n_updates          | 4067           |
| policy_entropy     | 2.2287679      |
| policy_loss        | -0.00030138344 |
| serial_timesteps   | 520576         |
| time_elapsed       | 3.22e+04       |
| total_timesteps    | 520576         |
| value_loss         | 0.007169163    |
---------------------------------------
Stage done
--------------------------------------
| approxkl           | 2.0861098e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.07e+03      |
| ep_reward_mean     | 4.38          |
| explained_variance | -0.00197      |
| fps                | 9             |
| n_updates          | 4068          |
| policy_entropy     | 2.2284832     |
| policy_loss        | -4.208507e-05 |

---------------------------------------
| approxkl           | 3.0389392e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.07e+03       |
| ep_reward_mean     | 4.38           |
| explained_variance | 0.000715       |
| fps                | 21             |
| n_updates          | 4081           |
| policy_entropy     | 2.2287621      |
| policy_loss        | -0.00033660978 |
| serial_timesteps   | 522368         |
| time_elapsed       | 3.23e+04       |
| total_timesteps    | 522368         |
| value_loss         | 0.005088784    |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 2.9105827e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.07e+03       |
| ep_reward_mean     | 4.38           |
| explained_variance | -0.00144       |
| fps                | 10             |
| n_updates          | 4082           |
| policy_entropy     | 2.2279623      |
| policy_loss        | -0.000

---------------------------------------
| approxkl           | 8.9774505e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.09e+03       |
| ep_reward_mean     | 4.42           |
| explained_variance | -0.00212       |
| fps                | 22             |
| n_updates          | 4095           |
| policy_entropy     | 2.2402382      |
| policy_loss        | -7.8884885e-05 |
| serial_timesteps   | 524160         |
| time_elapsed       | 3.25e+04       |
| total_timesteps    | 524160         |
| value_loss         | 0.004193412    |
---------------------------------------
---------------------------------------
| approxkl           | 2.749705e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.09e+03       |
| ep_reward_mean     | 4.42           |
| explained_variance | 0.00586        |
| fps                | 22             |
| n_updates          | 4096           |
| policy_entropy     | 2.240355       |
| policy_loss        | -0.00031465106 |


Round done
--------------------------------------
| approxkl           | 2.4219444e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.09e+03      |
| ep_reward_mean     | 4.39          |
| explained_variance | -0.0213       |
| fps                | 10            |
| n_updates          | 4109          |
| policy_entropy     | 2.1354585     |
| policy_loss        | -8.75406e-05  |
| serial_timesteps   | 525952        |
| time_elapsed       | 3.26e+04      |
| total_timesteps    | 525952        |
| value_loss         | 0.0047365176  |
--------------------------------------
---------------------------------------
| approxkl           | 6.2692225e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.09e+03       |
| ep_reward_mean     | 4.39           |
| explained_variance | 0.00378        |
| fps                | 21             |
| n_updates          | 4110           |
| policy_entropy     | 2.1718822      |
| policy_loss        | -0.00023172796 |
| se

Round done
--------------------------------------
| approxkl           | 1.3768779e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.09e+03      |
| ep_reward_mean     | 4.39          |
| explained_variance | 0.000749      |
| fps                | 10            |
| n_updates          | 4123          |
| policy_entropy     | 2.1843464     |
| policy_loss        | 6.2352046e-06 |
| serial_timesteps   | 527744        |
| time_elapsed       | 3.27e+04      |
| total_timesteps    | 527744        |
| value_loss         | 0.0048235375  |
--------------------------------------
---------------------------------------
| approxkl           | 2.1087974e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.09e+03       |
| ep_reward_mean     | 4.39           |
| explained_variance | -0.0044        |
| fps                | 22             |
| n_updates          | 4124           |
| policy_entropy     | 2.1869805      |
| policy_loss        | -0.00028150156 |
| se

Stage done
--------------------------------------
| approxkl           | 2.5236106e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.09e+03      |
| ep_reward_mean     | 4.39          |
| explained_variance | 0.00213       |
| fps                | 9             |
| n_updates          | 4137          |
| policy_entropy     | 2.2159305     |
| policy_loss        | -0.0003535077 |
| serial_timesteps   | 529536        |
| time_elapsed       | 3.28e+04      |
| total_timesteps    | 529536        |
| value_loss         | 0.0050333967  |
--------------------------------------
--------------------------------------
| approxkl           | 4.516041e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.09e+03      |
| ep_reward_mean     | 4.39          |
| explained_variance | -2.24e-05     |
| fps                | 22            |
| n_updates          | 4138          |
| policy_entropy     | 2.2208319     |
| policy_loss        | -3.853813e-06 |
| serial_times

Round done
--------------------------------------
| approxkl           | 1.8649253e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.12e+03      |
| ep_reward_mean     | 4.43          |
| explained_variance | -0.00159      |
| fps                | 10            |
| n_updates          | 4151          |
| policy_entropy     | 2.2028494     |
| policy_loss        | 2.6309863e-05 |
| serial_timesteps   | 531328        |
| time_elapsed       | 3.29e+04      |
| total_timesteps    | 531328        |
| value_loss         | 0.0049810847  |
--------------------------------------
---------------------------------------
| approxkl           | 8.141345e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.12e+03       |
| ep_reward_mean     | 4.43           |
| explained_variance | 0.0119         |
| fps                | 22             |
| n_updates          | 4152           |
| policy_entropy     | 2.2042956      |
| policy_loss        | -0.00019476563 |
| se

---------------------------------------
| approxkl           | 7.5350613e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.12e+03       |
| ep_reward_mean     | 4.43           |
| explained_variance | -0.000708      |
| fps                | 22             |
| n_updates          | 4165           |
| policy_entropy     | 2.2022018      |
| policy_loss        | -0.00043009967 |
| serial_timesteps   | 533120         |
| time_elapsed       | 3.3e+04        |
| total_timesteps    | 533120         |
| value_loss         | 0.0087744      |
---------------------------------------
-------------------------------------
| approxkl           | 4.775028e-06 |
| clipfrac           | 0.0          |
| ep_len_mean        | 3.12e+03     |
| ep_reward_mean     | 4.43         |
| explained_variance | -0.000306    |
| fps                | 22           |
| n_updates          | 4166         |
| policy_entropy     | 2.1973364    |
| policy_loss        | 0.0001234524 |
| serial_timesteps  

--------------------------------------
| approxkl           | 1.0423012e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.13e+03      |
| ep_reward_mean     | 4.42          |
| explained_variance | 0.00107       |
| fps                | 22            |
| n_updates          | 4179          |
| policy_entropy     | 2.1741261     |
| policy_loss        | -7.221103e-05 |
| serial_timesteps   | 534912        |
| time_elapsed       | 3.32e+04      |
| total_timesteps    | 534912        |
| value_loss         | 0.0059374403  |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 1.31425895e-05 |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.13e+03       |
| ep_reward_mean     | 4.42           |
| explained_variance | -0.00332       |
| fps                | 10             |
| n_updates          | 4180           |
| policy_entropy     | 2.1682615      |
| policy_loss        | -0.0007956084  |
| se

Round done
--------------------------------------
| approxkl           | 4.2015154e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.09e+03      |
| ep_reward_mean     | 4.37          |
| explained_variance | -0.00177      |
| fps                | 10            |
| n_updates          | 4193          |
| policy_entropy     | 2.1600735     |
| policy_loss        | 1.8078834e-05 |
| serial_timesteps   | 536704        |
| time_elapsed       | 3.33e+04      |
| total_timesteps    | 536704        |
| value_loss         | 0.0023465268  |
--------------------------------------
--------------------------------------
| approxkl           | 2.8008087e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.09e+03      |
| ep_reward_mean     | 4.37          |
| explained_variance | 0.00742       |
| fps                | 22            |
| n_updates          | 4194          |
| policy_entropy     | 2.1606143     |
| policy_loss        | -6.183982e-05 |
| serial_times

Round done
---------------------------------------
| approxkl           | 2.1173946e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.09e+03       |
| ep_reward_mean     | 4.37           |
| explained_variance | 0.00163        |
| fps                | 10             |
| n_updates          | 4207           |
| policy_entropy     | 2.1610556      |
| policy_loss        | -0.00035254285 |
| serial_timesteps   | 538496         |
| time_elapsed       | 3.34e+04       |
| total_timesteps    | 538496         |
| value_loss         | 0.0049317395   |
---------------------------------------
--------------------------------------
| approxkl           | 9.046089e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.09e+03      |
| ep_reward_mean     | 4.37          |
| explained_variance | 0.000376      |
| fps                | 23            |
| n_updates          | 4208          |
| policy_entropy     | 2.166646      |
| policy_loss        | -0.0004862845 |

---------------------------------------
| approxkl           | 1.0954691e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.06e+03       |
| ep_reward_mean     | 4.27           |
| explained_variance | 0.00621        |
| fps                | 22             |
| n_updates          | 4221           |
| policy_entropy     | 2.122405       |
| policy_loss        | -0.00033396855 |
| serial_timesteps   | 540288         |
| time_elapsed       | 3.35e+04       |
| total_timesteps    | 540288         |
| value_loss         | 0.006553241    |
---------------------------------------
Round done
--------------------------------------
| approxkl           | 2.0353255e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.06e+03      |
| ep_reward_mean     | 4.27          |
| explained_variance | -0.00574      |
| fps                | 10            |
| n_updates          | 4222          |
| policy_entropy     | 2.1580372     |
| policy_loss        | -0.0010718852 |

Round done
---------------------------------------
| approxkl           | 2.648656e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.06e+03       |
| ep_reward_mean     | 4.27           |
| explained_variance | 5.61e-05       |
| fps                | 10             |
| n_updates          | 4235           |
| policy_entropy     | 2.2025769      |
| policy_loss        | -6.0181133e-05 |
| serial_timesteps   | 542080         |
| time_elapsed       | 3.36e+04       |
| total_timesteps    | 542080         |
| value_loss         | 0.0056032995   |
---------------------------------------
--------------------------------------
| approxkl           | 1.3874106e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.06e+03      |
| ep_reward_mean     | 4.27          |
| explained_variance | -0.00207      |
| fps                | 22            |
| n_updates          | 4236          |
| policy_entropy     | 2.2022924     |
| policy_loss        | -8.075964e-05 |

Round done
---------------------------------------
| approxkl           | 1.1395137e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.07e+03       |
| ep_reward_mean     | 4.28           |
| explained_variance | -0.0164        |
| fps                | 10             |
| n_updates          | 4249           |
| policy_entropy     | 2.1061733      |
| policy_loss        | -0.00062615005 |
| serial_timesteps   | 543872         |
| time_elapsed       | 3.37e+04       |
| total_timesteps    | 543872         |
| value_loss         | 0.0032108168   |
---------------------------------------
---------------------------------------
| approxkl           | 1.1327636e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.07e+03       |
| ep_reward_mean     | 4.28           |
| explained_variance | 0.000318       |
| fps                | 22             |
| n_updates          | 4250           |
| policy_entropy     | 2.129564       |
| policy_loss        | -0.000

Round done
---------------------------------------
| approxkl           | 2.6273997e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.07e+03       |
| ep_reward_mean     | 4.28           |
| explained_variance | -0.00218       |
| fps                | 10             |
| n_updates          | 4263           |
| policy_entropy     | 2.1917136      |
| policy_loss        | -0.00035556406 |
| serial_timesteps   | 545664         |
| time_elapsed       | 3.39e+04       |
| total_timesteps    | 545664         |
| value_loss         | 0.004031242    |
---------------------------------------
--------------------------------------
| approxkl           | 8.162164e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.07e+03      |
| ep_reward_mean     | 4.28          |
| explained_variance | -0.0043       |
| fps                | 23            |
| n_updates          | 4264          |
| policy_entropy     | 2.1966417     |
| policy_loss        | -0.0005858019 |

---------------------------------------
| approxkl           | 8.136263e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.08e+03       |
| ep_reward_mean     | 4.24           |
| explained_variance | 0.013          |
| fps                | 22             |
| n_updates          | 4277           |
| policy_entropy     | 2.209796       |
| policy_loss        | -0.00031006336 |
| serial_timesteps   | 547456         |
| time_elapsed       | 3.4e+04        |
| total_timesteps    | 547456         |
| value_loss         | 0.0028402144   |
---------------------------------------
Round done
-------------------------------------
| approxkl           | 3.757893e-06 |
| clipfrac           | 0.0          |
| ep_len_mean        | 3.08e+03     |
| ep_reward_mean     | 4.24         |
| explained_variance | -0.0066      |
| fps                | 10           |
| n_updates          | 4278         |
| policy_entropy     | 2.2150843    |
| policy_loss        | 7.111207e-05 |
| serial_

---------------------------------------
| approxkl           | 1.3205491e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.08e+03       |
| ep_reward_mean     | 4.24           |
| explained_variance | -0.00112       |
| fps                | 22             |
| n_updates          | 4291           |
| policy_entropy     | 2.2371557      |
| policy_loss        | -2.1349639e-05 |
| serial_timesteps   | 549248         |
| time_elapsed       | 3.41e+04       |
| total_timesteps    | 549248         |
| value_loss         | 0.0026999153   |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 1.950584e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.08e+03       |
| ep_reward_mean     | 4.24           |
| explained_variance | -0.00055       |
| fps                | 9              |
| n_updates          | 4292           |
| policy_entropy     | 2.2400084      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 2.1947579e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.08e+03      |
| ep_reward_mean     | 4.22          |
| explained_variance | 0.00308       |
| fps                | 23            |
| n_updates          | 4305          |
| policy_entropy     | 2.235903      |
| policy_loss        | -0.0009813346 |
| serial_timesteps   | 551040        |
| time_elapsed       | 3.42e+04      |
| total_timesteps    | 551040        |
| value_loss         | 0.0039738026  |
--------------------------------------
--------------------------------------
| approxkl           | 2.243139e-05  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.08e+03      |
| ep_reward_mean     | 4.22          |
| explained_variance | -0.00953      |
| fps                | 23            |
| n_updates          | 4306          |
| policy_entropy     | 2.2432516     |
| policy_loss        | -0.0006772089 |
| serial_timesteps   | 55

---------------------------------------
| approxkl           | 8.416465e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.08e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | 0.00291        |
| fps                | 23             |
| n_updates          | 4319           |
| policy_entropy     | 2.2803352      |
| policy_loss        | -5.7972968e-05 |
| serial_timesteps   | 552832         |
| time_elapsed       | 3.43e+04       |
| total_timesteps    | 552832         |
| value_loss         | 0.0038658553   |
---------------------------------------
--------------------------------------
| approxkl           | 9.708338e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.08e+03      |
| ep_reward_mean     | 4.22          |
| explained_variance | 0.00276       |
| fps                | 23            |
| n_updates          | 4320          |
| policy_entropy     | 2.2834752     |
| policy_loss        | 1.0300428e-05 |
| serial_t

-------------------------------------
| approxkl           | 4.549288e-07 |
| clipfrac           | 0.0          |
| ep_len_mean        | 3.08e+03     |
| ep_reward_mean     | 4.22         |
| explained_variance | -0.00119     |
| fps                | 23           |
| n_updates          | 4333         |
| policy_entropy     | 2.2688076    |
| policy_loss        | -5.45457e-05 |
| serial_timesteps   | 554624       |
| time_elapsed       | 3.44e+04     |
| total_timesteps    | 554624       |
| value_loss         | 0.004034932  |
-------------------------------------
Stage done
---------------------------------------
| approxkl           | 1.2440022e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.08e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | 3.51e-05       |
| fps                | 9              |
| n_updates          | 4334           |
| policy_entropy     | 2.2678955      |
| policy_loss        | -0.00018720701 |
| serial_timesteps 

---------------------------------------
| approxkl           | 2.9731664e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.08e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | -0.00177       |
| fps                | 24             |
| n_updates          | 4347           |
| policy_entropy     | 2.296186       |
| policy_loss        | -0.00030324608 |
| serial_timesteps   | 556416         |
| time_elapsed       | 3.45e+04       |
| total_timesteps    | 556416         |
| value_loss         | 0.0040404606   |
---------------------------------------
---------------------------------------
| approxkl           | 1.6888362e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.08e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | 0.00193        |
| fps                | 23             |
| n_updates          | 4348           |
| policy_entropy     | 2.2996182      |
| policy_loss        | -0.00023569353 |


---------------------------------------
| approxkl           | 2.6817047e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.13e+03       |
| ep_reward_mean     | 4.21           |
| explained_variance | -0.00148       |
| fps                | 23             |
| n_updates          | 4361           |
| policy_entropy     | 2.2644815      |
| policy_loss        | -0.00013472885 |
| serial_timesteps   | 558208         |
| time_elapsed       | 3.46e+04       |
| total_timesteps    | 558208         |
| value_loss         | 0.003094421    |
---------------------------------------
---------------------------------------
| approxkl           | 2.069849e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.13e+03       |
| ep_reward_mean     | 4.21           |
| explained_variance | -0.000806      |
| fps                | 23             |
| n_updates          | 4362           |
| policy_entropy     | 2.268022       |
| policy_loss        | -0.00014268234 |


---------------------------------------
| approxkl           | 6.587444e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.13e+03       |
| ep_reward_mean     | 4.21           |
| explained_variance | 0.000761       |
| fps                | 23             |
| n_updates          | 4375           |
| policy_entropy     | 2.2596903      |
| policy_loss        | -0.00026336312 |
| serial_timesteps   | 560000         |
| time_elapsed       | 3.48e+04       |
| total_timesteps    | 560000         |
| value_loss         | 0.0030863974   |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 9.2169785e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.13e+03       |
| ep_reward_mean     | 4.21           |
| explained_variance | 0.00174        |
| fps                | 9              |
| n_updates          | 4376           |
| policy_entropy     | 2.2607372      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 0.00013439846 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.15e+03      |
| ep_reward_mean     | 4.22          |
| explained_variance | -0.0381       |
| fps                | 23            |
| n_updates          | 4389          |
| policy_entropy     | 2.0924687     |
| policy_loss        | -0.0011617411 |
| serial_timesteps   | 561792        |
| time_elapsed       | 3.49e+04      |
| total_timesteps    | 561792        |
| value_loss         | 0.005621083   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00010063556 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.15e+03      |
| ep_reward_mean     | 4.22          |
| explained_variance | 0.0184        |
| fps                | 22            |
| n_updates          | 4390          |
| policy_entropy     | 2.16342       |
| policy_loss        | -0.0006583263 |
| serial_timesteps   | 56

---------------------------------------
| approxkl           | 5.927238e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.15e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | 0.000704       |
| fps                | 23             |
| n_updates          | 4403           |
| policy_entropy     | 2.2557347      |
| policy_loss        | -0.00012873113 |
| serial_timesteps   | 563584         |
| time_elapsed       | 3.5e+04        |
| total_timesteps    | 563584         |
| value_loss         | 0.0034328296   |
---------------------------------------
--------------------------------------
| approxkl           | 3.88455e-06   |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.15e+03      |
| ep_reward_mean     | 4.22          |
| explained_variance | 6.81e-05      |
| fps                | 23            |
| n_updates          | 4404          |
| policy_entropy     | 2.255646      |
| policy_loss        | -0.0002966132 |
| serial_t

---------------------------------------
| approxkl           | 9.2068585e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.15e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | 7.15e-07       |
| fps                | 23             |
| n_updates          | 4417           |
| policy_entropy     | 2.2319305      |
| policy_loss        | -0.00022070296 |
| serial_timesteps   | 565376         |
| time_elapsed       | 3.51e+04       |
| total_timesteps    | 565376         |
| value_loss         | 0.0063214325   |
---------------------------------------
-------------------------------------
| approxkl           | 3.410749e-06 |
| clipfrac           | 0.0          |
| ep_len_mean        | 3.15e+03     |
| ep_reward_mean     | 4.22         |
| explained_variance | 0.000317     |
| fps                | 23           |
| n_updates          | 4418         |
| policy_entropy     | 2.2343025    |
| policy_loss        | 8.863583e-05 |
| serial_timesteps  

---------------------------------------
| approxkl           | 1.6137435e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.15e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | -0.00119       |
| fps                | 23             |
| n_updates          | 4431           |
| policy_entropy     | 2.2267537      |
| policy_loss        | -0.00033167936 |
| serial_timesteps   | 567168         |
| time_elapsed       | 3.52e+04       |
| total_timesteps    | 567168         |
| value_loss         | 0.012903715    |
---------------------------------------
Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 1.1984251e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.19e+03      |
| ep_reward_mean     | 4.28          |
| explained_variance | -0.197        |
| fps                | 7             |
| n_updates          | 4432          |
| policy_entrop

---------------------------------------
| approxkl           | 8.9487025e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | 0.000461       |
| fps                | 23             |
| n_updates          | 4445           |
| policy_entropy     | 1.9551024      |
| policy_loss        | -2.3528934e-05 |
| serial_timesteps   | 568960         |
| time_elapsed       | 3.53e+04       |
| total_timesteps    | 568960         |
| value_loss         | 0.0034152605   |
---------------------------------------
---------------------------------------
| approxkl           | 4.386053e-05   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | -0.00149       |
| fps                | 23             |
| n_updates          | 4446           |
| policy_entropy     | 1.9697988      |
| policy_loss        | -0.00022953376 |


--------------------------------------
| approxkl           | 0.00010885761 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.16e+03      |
| ep_reward_mean     | 4.15          |
| explained_variance | -0.00259      |
| fps                | 22            |
| n_updates          | 4459          |
| policy_entropy     | 2.041151      |
| policy_loss        | -0.0002213344 |
| serial_timesteps   | 570752        |
| time_elapsed       | 3.54e+04      |
| total_timesteps    | 570752        |
| value_loss         | 0.0041514514  |
--------------------------------------
Stage done
--------------------------------------
| approxkl           | 2.0345371e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.16e+03      |
| ep_reward_mean     | 4.15          |
| explained_variance | -0.00167      |
| fps                | 9             |
| n_updates          | 4460          |
| policy_entropy     | 2.0706859     |
| policy_loss        | 0.0003983155  |
| serial_times

--------------------------------------
| approxkl           | 5.1792804e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.16e+03      |
| ep_reward_mean     | 4.15          |
| explained_variance | -0.00121      |
| fps                | 23            |
| n_updates          | 4473          |
| policy_entropy     | 2.2060192     |
| policy_loss        | -0.0003557112 |
| serial_timesteps   | 572544        |
| time_elapsed       | 3.56e+04      |
| total_timesteps    | 572544        |
| value_loss         | 0.0019656888  |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 1.0218949e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.16e+03      |
| ep_reward_mean     | 4.15          |
| explained_variance | -0.00305      |
| fps                | 10            |
| n_updates          | 4474          |
| policy_entropy     | 2.2114234     |
| policy_loss        | -0.0006025452 |
| serial_times

---------------------------------------
| approxkl           | 1.2764148e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.16e+03       |
| ep_reward_mean     | 4.12           |
| explained_variance | -0.00317       |
| fps                | 23             |
| n_updates          | 4487           |
| policy_entropy     | 2.2214139      |
| policy_loss        | -0.00011400692 |
| serial_timesteps   | 574336         |
| time_elapsed       | 3.57e+04       |
| total_timesteps    | 574336         |
| value_loss         | 0.012000482    |
---------------------------------------
---------------------------------------
| approxkl           | 8.564037e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.16e+03       |
| ep_reward_mean     | 4.12           |
| explained_variance | -0.000463      |
| fps                | 23             |
| n_updates          | 4488           |
| policy_entropy     | 2.2280264      |
| policy_loss        | -0.00037954748 |


---------------------------------------
| approxkl           | 2.0954008e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.16e+03       |
| ep_reward_mean     | 4.12           |
| explained_variance | -0.000527      |
| fps                | 23             |
| n_updates          | 4501           |
| policy_entropy     | 2.2232614      |
| policy_loss        | -1.2613833e-05 |
| serial_timesteps   | 576128         |
| time_elapsed       | 3.58e+04       |
| total_timesteps    | 576128         |
| value_loss         | 0.0046422305   |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 1.2787793e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.16e+03       |
| ep_reward_mean     | 4.12           |
| explained_variance | -0.00362       |
| fps                | 10             |
| n_updates          | 4502           |
| policy_entropy     | 2.2240365      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 1.2727721e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.19e+03      |
| ep_reward_mean     | 4.12          |
| explained_variance | -0.0434       |
| fps                | 24            |
| n_updates          | 4515          |
| policy_entropy     | 1.8735602     |
| policy_loss        | -0.0011416916 |
| serial_timesteps   | 577920        |
| time_elapsed       | 3.59e+04      |
| total_timesteps    | 577920        |
| value_loss         | 0.002829025   |
--------------------------------------
-------------------------------------
| approxkl           | 9.398566e-05 |
| clipfrac           | 0.0          |
| ep_len_mean        | 3.19e+03     |
| ep_reward_mean     | 4.12         |
| explained_variance | -0.0521      |
| fps                | 23           |
| n_updates          | 4516         |
| policy_entropy     | 2.0636787    |
| policy_loss        | -0.002635967 |
| serial_timesteps   | 578048      

---------------------------------------
| approxkl           | 1.1917463e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 4.12           |
| explained_variance | -0.00238       |
| fps                | 23             |
| n_updates          | 4529           |
| policy_entropy     | 2.2043555      |
| policy_loss        | -0.00050855987 |
| serial_timesteps   | 579712         |
| time_elapsed       | 3.6e+04        |
| total_timesteps    | 579712         |
| value_loss         | 0.002941386    |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 1.020825e-05   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 4.12           |
| explained_variance | 0.00838        |
| fps                | 11             |
| n_updates          | 4530           |
| policy_entropy     | 2.1990352      |
| policy_loss        | -0.000

Round done
---------------------------------------
| approxkl           | 5.0644358e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 4.08           |
| explained_variance | 0.0124         |
| fps                | 9              |
| n_updates          | 4543           |
| policy_entropy     | 2.1455083      |
| policy_loss        | -0.00015568687 |
| serial_timesteps   | 581504         |
| time_elapsed       | 3.61e+04       |
| total_timesteps    | 581504         |
| value_loss         | 0.0043784096   |
---------------------------------------
---------------------------------------
| approxkl           | 3.171221e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 4.08           |
| explained_variance | 0.0154         |
| fps                | 22             |
| n_updates          | 4544           |
| policy_entropy     | 2.1519985      |
| policy_loss        | -2.903

---------------------------------------
| approxkl           | 1.2072131e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 4.08           |
| explained_variance | 0.00531        |
| fps                | 22             |
| n_updates          | 4557           |
| policy_entropy     | 2.1803794      |
| policy_loss        | -0.00023357198 |
| serial_timesteps   | 583296         |
| time_elapsed       | 3.62e+04       |
| total_timesteps    | 583296         |
| value_loss         | 0.0033840036   |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 2.3733555e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 4.08           |
| explained_variance | 0.00213        |
| fps                | 10             |
| n_updates          | 4558           |
| policy_entropy     | 2.1826386      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 7.099415e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 4.14          |
| explained_variance | 0.0128        |
| fps                | 23            |
| n_updates          | 4571          |
| policy_entropy     | 2.1834192     |
| policy_loss        | -0.0003756322 |
| serial_timesteps   | 585088        |
| time_elapsed       | 3.64e+04      |
| total_timesteps    | 585088        |
| value_loss         | 0.0034377938  |
--------------------------------------
---------------------------------------
| approxkl           | 7.700452e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 4.14           |
| explained_variance | -0.00355       |
| fps                | 23             |
| n_updates          | 4572           |
| policy_entropy     | 2.1909928      |
| policy_loss        | -0.00021721795 |
| serial_timest

---------------------------------------
| approxkl           | 8.921541e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 4.14           |
| explained_variance | -0.00124       |
| fps                | 23             |
| n_updates          | 4585           |
| policy_entropy     | 2.2004943      |
| policy_loss        | -0.00040268153 |
| serial_timesteps   | 586880         |
| time_elapsed       | 3.65e+04       |
| total_timesteps    | 586880         |
| value_loss         | 0.009363279    |
---------------------------------------
---------------------------------------
| approxkl           | 5.7825464e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 4.14           |
| explained_variance | 0.00018        |
| fps                | 23             |
| n_updates          | 4586           |
| policy_entropy     | 2.2052178      |
| policy_loss        | -4.2069703e-05 |


Stage done
---------------------------------------
| approxkl           | 7.6975375e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 4.11           |
| explained_variance | -0.00115       |
| fps                | 9              |
| n_updates          | 4599           |
| policy_entropy     | 2.1761909      |
| policy_loss        | -0.00035394076 |
| serial_timesteps   | 588672         |
| time_elapsed       | 3.66e+04       |
| total_timesteps    | 588672         |
| value_loss         | 0.008209729    |
---------------------------------------
---------------------------------------
| approxkl           | 5.2683754e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 4.11           |
| explained_variance | 0.000351       |
| fps                | 23             |
| n_updates          | 4600           |
| policy_entropy     | 2.1850452      |
| policy_loss        | -3.352

Round done
--------------------------------------
| approxkl           | 1.2680537e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.22e+03      |
| ep_reward_mean     | 4.11          |
| explained_variance | -0.000491     |
| fps                | 10            |
| n_updates          | 4613          |
| policy_entropy     | 2.183382      |
| policy_loss        | 0.00011874549 |
| serial_timesteps   | 590464        |
| time_elapsed       | 3.67e+04      |
| total_timesteps    | 590464        |
| value_loss         | 0.0033271946  |
--------------------------------------
--------------------------------------
| approxkl           | 2.8162121e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.22e+03      |
| ep_reward_mean     | 4.11          |
| explained_variance | -0.00112      |
| fps                | 23            |
| n_updates          | 4614          |
| policy_entropy     | 2.1796248     |
| policy_loss        | 0.00015892833 |
| serial_times

---------------------------------------
| approxkl           | 2.820416e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 4.11           |
| explained_variance | 0.00238        |
| fps                | 22             |
| n_updates          | 4627           |
| policy_entropy     | 2.2285938      |
| policy_loss        | -0.00033762306 |
| serial_timesteps   | 592256         |
| time_elapsed       | 3.68e+04       |
| total_timesteps    | 592256         |
| value_loss         | 0.0045530936   |
---------------------------------------
--------------------------------------
| approxkl           | 8.07871e-06   |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.22e+03      |
| ep_reward_mean     | 4.11          |
| explained_variance | 0.00271       |
| fps                | 23            |
| n_updates          | 4628          |
| policy_entropy     | 2.2337747     |
| policy_loss        | -0.0006942004 |
| serial_t

--------------------------------------
| approxkl           | 3.4856086e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.24e+03      |
| ep_reward_mean     | 4.1           |
| explained_variance | -0.0293       |
| fps                | 23            |
| n_updates          | 4641          |
| policy_entropy     | 2.1517372     |
| policy_loss        | 0.00020149723 |
| serial_timesteps   | 594048        |
| time_elapsed       | 3.69e+04      |
| total_timesteps    | 594048        |
| value_loss         | 0.0033308337  |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 1.0761746e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.24e+03      |
| ep_reward_mean     | 4.1           |
| explained_variance | -0.000887     |
| fps                | 10            |
| n_updates          | 4642          |
| policy_entropy     | 2.1905515     |
| policy_loss        | -0.0002650097 |
| serial_times

Round done
--------------------------------------
| approxkl           | 1.5084092e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.24e+03      |
| ep_reward_mean     | 4.1           |
| explained_variance | -0.00126      |
| fps                | 10            |
| n_updates          | 4655          |
| policy_entropy     | 2.2530992     |
| policy_loss        | -6.231852e-05 |
| serial_timesteps   | 595840        |
| time_elapsed       | 3.7e+04       |
| total_timesteps    | 595840        |
| value_loss         | 0.0035153246  |
--------------------------------------
--------------------------------------
| approxkl           | 2.2733152e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.24e+03      |
| ep_reward_mean     | 4.1           |
| explained_variance | 0.000851      |
| fps                | 23            |
| n_updates          | 4656          |
| policy_entropy     | 2.2547143     |
| policy_loss        | -0.0002601035 |
| serial_times

Stage done
-------------------------------------
| approxkl           | 0.0017667315 |
| clipfrac           | 0.015625     |
| ep_len_mean        | 3.22e+03     |
| ep_reward_mean     | 4.05         |
| explained_variance | -0.0612      |
| fps                | 9            |
| n_updates          | 4669         |
| policy_entropy     | 2.1887126    |
| policy_loss        | -0.012215078 |
| serial_timesteps   | 597632       |
| time_elapsed       | 3.71e+04     |
| total_timesteps    | 597632       |
| value_loss         | 0.005379998  |
-------------------------------------
--------------------------------------
| approxkl           | 0.00048055098 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.22e+03      |
| ep_reward_mean     | 4.05          |
| explained_variance | -0.149        |
| fps                | 22            |
| n_updates          | 4670          |
| policy_entropy     | 2.1852882     |
| policy_loss        | -0.010308711  |
| serial_timesteps   | 597760

---------------------------------------
| approxkl           | 1.666427e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 4.05           |
| explained_variance | 0.00248        |
| fps                | 22             |
| n_updates          | 4683           |
| policy_entropy     | 2.2465138      |
| policy_loss        | -4.7177076e-05 |
| serial_timesteps   | 599424         |
| time_elapsed       | 3.73e+04       |
| total_timesteps    | 599424         |
| value_loss         | 0.0038924753   |
---------------------------------------
--------------------------------------
| approxkl           | 2.9232144e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.22e+03      |
| ep_reward_mean     | 4.05          |
| explained_variance | 0.00179       |
| fps                | 21            |
| n_updates          | 4684          |
| policy_entropy     | 2.244753      |
| policy_loss        | -0.0003559962 |
| serial_t

--------------------------------------
| approxkl           | 8.5615375e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 4.01          |
| explained_variance | -0.000985     |
| fps                | 22            |
| n_updates          | 4697          |
| policy_entropy     | 2.237629      |
| policy_loss        | -4.474446e-05 |
| serial_timesteps   | 601216        |
| time_elapsed       | 3.74e+04      |
| total_timesteps    | 601216        |
| value_loss         | 0.0048755566  |
--------------------------------------
---------------------------------------
| approxkl           | 7.2147895e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 4.01           |
| explained_variance | -0.00293       |
| fps                | 22             |
| n_updates          | 4698           |
| policy_entropy     | 2.239859       |
| policy_loss        | -5.2443705e-05 |
| serial_timest

---------------------------------------
| approxkl           | 1.9882555e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.06           |
| explained_variance | 0.035          |
| fps                | 22             |
| n_updates          | 4711           |
| policy_entropy     | 2.062994       |
| policy_loss        | -0.00075905025 |
| serial_timesteps   | 603008         |
| time_elapsed       | 3.75e+04       |
| total_timesteps    | 603008         |
| value_loss         | 0.0052186004   |
---------------------------------------
---------------------------------------
| approxkl           | 9.801213e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.06           |
| explained_variance | -0.00318       |
| fps                | 22             |
| n_updates          | 4712           |
| policy_entropy     | 2.1885471      |
| policy_loss        | -0.00033970736 |


--------------------------------------
| approxkl           | 2.402385e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.21e+03      |
| ep_reward_mean     | 4.06          |
| explained_variance | -0.00219      |
| fps                | 22            |
| n_updates          | 4725          |
| policy_entropy     | 2.2644086     |
| policy_loss        | -0.0001551956 |
| serial_timesteps   | 604800        |
| time_elapsed       | 3.76e+04      |
| total_timesteps    | 604800        |
| value_loss         | 0.005096844   |
--------------------------------------
---------------------------------------
| approxkl           | 3.156978e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.06           |
| explained_variance | -0.00774       |
| fps                | 22             |
| n_updates          | 4726           |
| policy_entropy     | 2.266492       |
| policy_loss        | -0.00025011227 |
| serial_timest

---------------------------------------
| approxkl           | 1.7797802e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 4.06           |
| explained_variance | -0.007         |
| fps                | 23             |
| n_updates          | 4739           |
| policy_entropy     | 2.2545443      |
| policy_loss        | -0.00026436895 |
| serial_timesteps   | 606592         |
| time_elapsed       | 3.77e+04       |
| total_timesteps    | 606592         |
| value_loss         | 0.004867723    |
---------------------------------------
Stage done
--------------------------------------
| approxkl           | 1.6820958e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.22e+03      |
| ep_reward_mean     | 4.06          |
| explained_variance | 0.000947      |
| fps                | 9             |
| n_updates          | 4740          |
| policy_entropy     | 2.257158      |
| policy_loss        | 2.6576221e-05 |

--------------------------------------
| approxkl           | 3.3888998e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.22e+03      |
| ep_reward_mean     | 4.06          |
| explained_variance | 3.67e-05      |
| fps                | 22            |
| n_updates          | 4753          |
| policy_entropy     | 2.2742612     |
| policy_loss        | -9.894185e-05 |
| serial_timesteps   | 608384        |
| time_elapsed       | 3.79e+04      |
| total_timesteps    | 608384        |
| value_loss         | 0.0022049975  |
--------------------------------------
Stage done
---------------------------------------
| approxkl           | 3.2542296e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 4.06           |
| explained_variance | -0.00102       |
| fps                | 8              |
| n_updates          | 4754           |
| policy_entropy     | 2.2714372      |
| policy_loss        | -0.00028169155 |
| se

---------------------------------------
| approxkl           | 3.3917172e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.1            |
| explained_variance | 0.0163         |
| fps                | 22             |
| n_updates          | 4767           |
| policy_entropy     | 2.2182953      |
| policy_loss        | -0.00033673272 |
| serial_timesteps   | 610176         |
| time_elapsed       | 3.8e+04        |
| total_timesteps    | 610176         |
| value_loss         | 0.0016903015   |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 1.7459402e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.1            |
| explained_variance | -0.0115        |
| fps                | 8              |
| n_updates          | 4768           |
| policy_entropy     | 2.225236       |
| policy_loss        | -3.793

---------------------------------------
| approxkl           | 5.502884e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 4.07           |
| explained_variance | 0.00885        |
| fps                | 22             |
| n_updates          | 4781           |
| policy_entropy     | 2.266101       |
| policy_loss        | -0.00030073524 |
| serial_timesteps   | 611968         |
| time_elapsed       | 3.81e+04       |
| total_timesteps    | 611968         |
| value_loss         | 0.0018838813   |
---------------------------------------
Stage done
--------------------------------------
| approxkl           | 8.850117e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 4.07          |
| explained_variance | -0.0199       |
| fps                | 8             |
| n_updates          | 4782          |
| policy_entropy     | 2.2747393     |
| policy_loss        | -0.0005160775 |

--------------------------------------
| approxkl           | 1.047121e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 4.07          |
| explained_variance | -0.00056      |
| fps                | 22            |
| n_updates          | 4795          |
| policy_entropy     | 2.3000464     |
| policy_loss        | -4.142523e-06 |
| serial_timesteps   | 613760        |
| time_elapsed       | 3.82e+04      |
| total_timesteps    | 613760        |
| value_loss         | 0.0036074752  |
--------------------------------------
----------------------------------------
| approxkl           | 2.595292e-07    |
| clipfrac           | 0.0             |
| ep_len_mean        | 3.2e+03         |
| ep_reward_mean     | 4.07            |
| explained_variance | 0.000393        |
| fps                | 22              |
| n_updates          | 4796            |
| policy_entropy     | 2.2997093       |
| policy_loss        | -0.000116594136 |
| ser

--------------------------------------
| approxkl           | 7.600274e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.19e+03      |
| ep_reward_mean     | 4.1           |
| explained_variance | 0.00224       |
| fps                | 22            |
| n_updates          | 4809          |
| policy_entropy     | 2.2636435     |
| policy_loss        | -6.147288e-05 |
| serial_timesteps   | 615552        |
| time_elapsed       | 3.83e+04      |
| total_timesteps    | 615552        |
| value_loss         | 0.0048747472  |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 3.27453e-07   |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.19e+03      |
| ep_reward_mean     | 4.1           |
| explained_variance | 0.00526       |
| fps                | 10            |
| n_updates          | 4810          |
| policy_entropy     | 2.2649374     |
| policy_loss        | 3.6414713e-05 |
| serial_times

Round done
-------------------------------------
| approxkl           | 6.703144e-07 |
| clipfrac           | 0.0          |
| ep_len_mean        | 3.19e+03     |
| ep_reward_mean     | 4.1          |
| explained_variance | 0.00196      |
| fps                | 10           |
| n_updates          | 4823         |
| policy_entropy     | 2.266446     |
| policy_loss        | 3.232807e-05 |
| serial_timesteps   | 617344       |
| time_elapsed       | 3.84e+04     |
| total_timesteps    | 617344       |
| value_loss         | 0.006969732  |
-------------------------------------
---------------------------------------
| approxkl           | 4.855358e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 4.1            |
| explained_variance | 0.00147        |
| fps                | 22             |
| n_updates          | 4824           |
| policy_entropy     | 2.2667902      |
| policy_loss        | -0.00013208017 |
| serial_timesteps 

Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 1.0622952e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.21e+03      |
| ep_reward_mean     | 4.1           |
| explained_variance | 0.115         |
| fps                | 7             |
| n_updates          | 4837          |
| policy_entropy     | 2.0361152     |
| policy_loss        | -0.000506714  |
| serial_timesteps   | 619136        |
| time_elapsed       | 3.86e+04      |
| total_timesteps    | 619136        |
| value_loss         | 0.0044132443  |
--------------------------------------
---------------------------------------
| approxkl           | 1.587301e-05   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.1            |
| explained_variance | -0.0075        |
| fps                | 23             |
| n_updates          | 4838           |
| policy_entropy     |

Round done
---------------------------------------
| approxkl           | 5.327666e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.1            |
| explained_variance | -0.00284       |
| fps                | 10             |
| n_updates          | 4851           |
| policy_entropy     | 2.2286477      |
| policy_loss        | -6.7260116e-05 |
| serial_timesteps   | 620928         |
| time_elapsed       | 3.87e+04       |
| total_timesteps    | 620928         |
| value_loss         | 0.0026097493   |
---------------------------------------
---------------------------------------
| approxkl           | 5.000596e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.1            |
| explained_variance | 0.00317        |
| fps                | 23             |
| n_updates          | 4852           |
| policy_entropy     | 2.2303157      |
| policy_loss        | -6.948

---------------------------------------
| approxkl           | 5.347225e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.1            |
| explained_variance | 0.0037         |
| fps                | 23             |
| n_updates          | 4865           |
| policy_entropy     | 2.2578807      |
| policy_loss        | -0.00020852685 |
| serial_timesteps   | 622720         |
| time_elapsed       | 3.88e+04       |
| total_timesteps    | 622720         |
| value_loss         | 0.0036843326   |
---------------------------------------
---------------------------------------
| approxkl           | 3.5928153e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.1            |
| explained_variance | 0.00287        |
| fps                | 23             |
| n_updates          | 4866           |
| policy_entropy     | 2.2634804      |
| policy_loss        | -0.00020262029 |


---------------------------------------
| approxkl           | 2.1983956e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 4.08           |
| explained_variance | -0.00496       |
| fps                | 22             |
| n_updates          | 4879           |
| policy_entropy     | 2.2371356      |
| policy_loss        | -0.00022606179 |
| serial_timesteps   | 624512         |
| time_elapsed       | 3.89e+04       |
| total_timesteps    | 624512         |
| value_loss         | 0.0037165354   |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 2.1620417e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 4.08           |
| explained_variance | 0.00302        |
| fps                | 8              |
| n_updates          | 4880           |
| policy_entropy     | 2.242994       |
| policy_loss        | -0.000

Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 3.7561927e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.21e+03      |
| ep_reward_mean     | 4.09          |
| explained_variance | 0.0318        |
| fps                | 7             |
| n_updates          | 4893          |
| policy_entropy     | 2.0232675     |
| policy_loss        | -0.0016928092 |
| serial_timesteps   | 626304        |
| time_elapsed       | 3.9e+04       |
| total_timesteps    | 626304        |
| value_loss         | 0.007368662   |
--------------------------------------
--------------------------------------
| approxkl           | 2.3679599e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.21e+03      |
| ep_reward_mean     | 4.09          |
| explained_variance | -0.00258      |
| fps                | 22            |
| n_updates          | 4894          |
| policy_entropy     | 2.18662

--------------------------------------
| approxkl           | 4.0707323e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.21e+03      |
| ep_reward_mean     | 4.09          |
| explained_variance | -0.00158      |
| fps                | 21            |
| n_updates          | 4907          |
| policy_entropy     | 2.2383394     |
| policy_loss        | 3.3928081e-06 |
| serial_timesteps   | 628096        |
| time_elapsed       | 3.91e+04      |
| total_timesteps    | 628096        |
| value_loss         | 0.008064208   |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 1.0453934e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.09           |
| explained_variance | 0.00136        |
| fps                | 10             |
| n_updates          | 4908           |
| policy_entropy     | 2.237224       |
| policy_loss        | -2.3923814e-05 |
| se

---------------------------------------
| approxkl           | 2.579168e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.09           |
| explained_variance | 0.000156       |
| fps                | 23             |
| n_updates          | 4921           |
| policy_entropy     | 2.2434444      |
| policy_loss        | -0.00022118725 |
| serial_timesteps   | 629888         |
| time_elapsed       | 3.92e+04       |
| total_timesteps    | 629888         |
| value_loss         | 0.0036451365   |
---------------------------------------
Round done
--------------------------------------
| approxkl           | 6.5232257e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.21e+03      |
| ep_reward_mean     | 4.09          |
| explained_variance | -0.00243      |
| fps                | 10            |
| n_updates          | 4922          |
| policy_entropy     | 2.248992      |
| policy_loss        | -0.0004750602 |

---------------------------------------
| approxkl           | 2.7795422e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 4.14           |
| explained_variance | 0.0016         |
| fps                | 23             |
| n_updates          | 4935           |
| policy_entropy     | 2.1961203      |
| policy_loss        | -0.00015443005 |
| serial_timesteps   | 631680         |
| time_elapsed       | 3.94e+04       |
| total_timesteps    | 631680         |
| value_loss         | 0.006208605    |
---------------------------------------
--------------------------------------
| approxkl           | 1.202925e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.23e+03      |
| ep_reward_mean     | 4.14          |
| explained_variance | -0.00383      |
| fps                | 23            |
| n_updates          | 4936          |
| policy_entropy     | 2.2005467     |
| policy_loss        | -6.439164e-05 |
| serial_t

---------------------------------------
| approxkl           | 3.6210056e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 4.14           |
| explained_variance | 0.000882       |
| fps                | 23             |
| n_updates          | 4949           |
| policy_entropy     | 2.2571084      |
| policy_loss        | -0.00029397383 |
| serial_timesteps   | 633472         |
| time_elapsed       | 3.95e+04       |
| total_timesteps    | 633472         |
| value_loss         | 0.008850391    |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 1.5984783e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 4.14           |
| explained_variance | 0.0055         |
| fps                | 10             |
| n_updates          | 4950           |
| policy_entropy     | 2.2610586      |
| policy_loss        | 0.0001

Stage done
---------------------------------------
| approxkl           | 1.1864795e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.24e+03       |
| ep_reward_mean     | 4.17           |
| explained_variance | -0.00262       |
| fps                | 9              |
| n_updates          | 4963           |
| policy_entropy     | 2.1866336      |
| policy_loss        | -0.00012038462 |
| serial_timesteps   | 635264         |
| time_elapsed       | 3.96e+04       |
| total_timesteps    | 635264         |
| value_loss         | 0.0028752778   |
---------------------------------------
---------------------------------------
| approxkl           | 1.933555e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.24e+03       |
| ep_reward_mean     | 4.17           |
| explained_variance | -0.00778       |
| fps                | 22             |
| n_updates          | 4964           |
| policy_entropy     | 2.1956434      |
| policy_loss        | -0.000

Stage done
---------------------------------------
| approxkl           | 5.056557e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.24e+03       |
| ep_reward_mean     | 4.17           |
| explained_variance | -0.00165       |
| fps                | 10             |
| n_updates          | 4977           |
| policy_entropy     | 2.264359       |
| policy_loss        | -0.00011794269 |
| serial_timesteps   | 637056         |
| time_elapsed       | 3.97e+04       |
| total_timesteps    | 637056         |
| value_loss         | 0.0042270734   |
---------------------------------------
--------------------------------------
| approxkl           | 2.2459483e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.24e+03      |
| ep_reward_mean     | 4.17          |
| explained_variance | -0.000245     |
| fps                | 23            |
| n_updates          | 4978          |
| policy_entropy     | 2.2654445     |
| policy_loss        | -0.0003899485 |

---------------------------------------
| approxkl           | 4.37558e-06    |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.17           |
| explained_variance | 0.00172        |
| fps                | 23             |
| n_updates          | 4991           |
| policy_entropy     | 2.2440946      |
| policy_loss        | -0.00050313957 |
| serial_timesteps   | 638848         |
| time_elapsed       | 3.98e+04       |
| total_timesteps    | 638848         |
| value_loss         | 0.0040413835   |
---------------------------------------
Round done
--------------------------------------
| approxkl           | 2.6762089e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.17          |
| explained_variance | -0.00414      |
| fps                | 10            |
| n_updates          | 4992          |
| policy_entropy     | 2.25475       |
| policy_loss        | 1.8894672e-05 |

----------------------------------------
| approxkl           | 7.602206e-07    |
| clipfrac           | 0.0             |
| ep_len_mean        | 3.26e+03        |
| ep_reward_mean     | 4.17            |
| explained_variance | 0.000651        |
| fps                | 22              |
| n_updates          | 5005            |
| policy_entropy     | 2.291681        |
| policy_loss        | -0.000104334205 |
| serial_timesteps   | 640640          |
| time_elapsed       | 3.99e+04        |
| total_timesteps    | 640640          |
| value_loss         | 0.0056780875    |
----------------------------------------
Round done
---------------------------------------
| approxkl           | 1.6217157e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.17           |
| explained_variance | -0.000577      |
| fps                | 10             |
| n_updates          | 5006           |
| policy_entropy     | 2.292504       |
| policy_loss 

---------------------------------------
| approxkl           | 8.669926e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.28e+03       |
| ep_reward_mean     | 4.2            |
| explained_variance | 0.0021         |
| fps                | 23             |
| n_updates          | 5019           |
| policy_entropy     | 2.2802534      |
| policy_loss        | -0.00015832484 |
| serial_timesteps   | 642432         |
| time_elapsed       | 4e+04          |
| total_timesteps    | 642432         |
| value_loss         | 0.0021621566   |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 1.7362413e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.28e+03       |
| ep_reward_mean     | 4.2            |
| explained_variance | 0.000579       |
| fps                | 9              |
| n_updates          | 5020           |
| policy_entropy     | 2.2808604      |
| policy_loss        | -0.000

---------------------------------------
| approxkl           | 7.8706717e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.14           |
| explained_variance | -0.000412      |
| fps                | 22             |
| n_updates          | 5033           |
| policy_entropy     | 2.2410011      |
| policy_loss        | -8.0081634e-05 |
| serial_timesteps   | 644224         |
| time_elapsed       | 4.02e+04       |
| total_timesteps    | 644224         |
| value_loss         | 0.008163955    |
---------------------------------------
Round done
--------------------------------------
| approxkl           | 8.622459e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.14          |
| explained_variance | 0.0019        |
| fps                | 10            |
| n_updates          | 5034          |
| policy_entropy     | 2.240316      |
| policy_loss        | -1.874566e-05 |

Round done
---------------------------------------
| approxkl           | 6.753055e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.16           |
| explained_variance | -0.00158       |
| fps                | 10             |
| n_updates          | 5047           |
| policy_entropy     | 2.242135       |
| policy_loss        | -0.00021229684 |
| serial_timesteps   | 646016         |
| time_elapsed       | 4.03e+04       |
| total_timesteps    | 646016         |
| value_loss         | 0.005160803    |
---------------------------------------
--------------------------------------
| approxkl           | 1.4026999e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.16          |
| explained_variance | -0.000266     |
| fps                | 22            |
| n_updates          | 5048          |
| policy_entropy     | 2.247089      |
| policy_loss        | 0.00016602501 |

---------------------------------------
| approxkl           | 2.7821898e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.16           |
| explained_variance | -3.72e-05      |
| fps                | 23             |
| n_updates          | 5061           |
| policy_entropy     | 2.2385476      |
| policy_loss        | -0.00021339953 |
| serial_timesteps   | 647808         |
| time_elapsed       | 4.04e+04       |
| total_timesteps    | 647808         |
| value_loss         | 0.010696915    |
---------------------------------------
--------------------------------------
| approxkl           | 1.6973859e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.16          |
| explained_variance | -0.000898     |
| fps                | 22            |
| n_updates          | 5062          |
| policy_entropy     | 2.2352335     |
| policy_loss        | 4.4211745e-05 |
| serial_t

---------------------------------------
| approxkl           | 2.773433e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.16           |
| explained_variance | -0.0014        |
| fps                | 22             |
| n_updates          | 5075           |
| policy_entropy     | 2.239381       |
| policy_loss        | -1.4025718e-05 |
| serial_timesteps   | 649600         |
| time_elapsed       | 4.05e+04       |
| total_timesteps    | 649600         |
| value_loss         | 0.0035830217   |
---------------------------------------
Round done
--------------------------------------
| approxkl           | 2.60783e-06   |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.16          |
| explained_variance | 0.000445      |
| fps                | 10            |
| n_updates          | 5076          |
| policy_entropy     | 2.2408433     |
| policy_loss        | -0.0003910479 |

Stage done
--------------------------------------
| approxkl           | 2.7895685e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.27e+03      |
| ep_reward_mean     | 4.22          |
| explained_variance | -0.00148      |
| fps                | 8             |
| n_updates          | 5089          |
| policy_entropy     | 2.2442336     |
| policy_loss        | 0.00016085058 |
| serial_timesteps   | 651392        |
| time_elapsed       | 4.06e+04      |
| total_timesteps    | 651392        |
| value_loss         | 0.0057584825  |
--------------------------------------
---------------------------------------
| approxkl           | 1.0078445e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.27e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | 0.00273        |
| fps                | 22             |
| n_updates          | 5090           |
| policy_entropy     | 2.24785        |
| policy_loss        | -0.00018132105 |
| se

---------------------------------------
| approxkl           | 7.673272e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.27e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | 0.000344       |
| fps                | 22             |
| n_updates          | 5103           |
| policy_entropy     | 2.1790736      |
| policy_loss        | -0.00028724223 |
| serial_timesteps   | 653184         |
| time_elapsed       | 4.07e+04       |
| total_timesteps    | 653184         |
| value_loss         | 0.008037212    |
---------------------------------------
--------------------------------------
| approxkl           | 2.417994e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.27e+03      |
| ep_reward_mean     | 4.22          |
| explained_variance | 0.00895       |
| fps                | 22            |
| n_updates          | 5104          |
| policy_entropy     | 2.225275      |
| policy_loss        | -1.712516e-05 |
| serial_t

---------------------------------------
| approxkl           | 2.4820856e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.27e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | -0.00097       |
| fps                | 22             |
| n_updates          | 5117           |
| policy_entropy     | 2.2908652      |
| policy_loss        | -0.00025077723 |
| serial_timesteps   | 654976         |
| time_elapsed       | 4.09e+04       |
| total_timesteps    | 654976         |
| value_loss         | 0.0011198999   |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 1.2576246e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.27e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | -0.000364      |
| fps                | 10             |
| n_updates          | 5118           |
| policy_entropy     | 2.2934844      |
| policy_loss        | -0.000

---------------------------------------
| approxkl           | 3.138041e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.27e+03       |
| ep_reward_mean     | 4.19           |
| explained_variance | 0.00762        |
| fps                | 22             |
| n_updates          | 5131           |
| policy_entropy     | 2.2916393      |
| policy_loss        | -0.00024320208 |
| serial_timesteps   | 656768         |
| time_elapsed       | 4.1e+04        |
| total_timesteps    | 656768         |
| value_loss         | 0.0026712613   |
---------------------------------------
--------------------------------------
| approxkl           | 1.9711738e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.27e+03      |
| ep_reward_mean     | 4.19          |
| explained_variance | -0.00286      |
| fps                | 21            |
| n_updates          | 5132          |
| policy_entropy     | 2.2965965     |
| policy_loss        | -7.595867e-06 |
| serial_t

--------------------------------------
| approxkl           | 2.6010434e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.14          |
| explained_variance | -7.89e-05     |
| fps                | 24            |
| n_updates          | 5145          |
| policy_entropy     | 2.2930298     |
| policy_loss        | -9.849528e-05 |
| serial_timesteps   | 658560        |
| time_elapsed       | 4.11e+04      |
| total_timesteps    | 658560        |
| value_loss         | 0.0037567716  |
--------------------------------------
--------------------------------------
| approxkl           | 4.153544e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.14          |
| explained_variance | 0.00782       |
| fps                | 23            |
| n_updates          | 5146          |
| policy_entropy     | 2.2950897     |
| policy_loss        | -0.0003179051 |
| serial_timesteps   | 65

---------------------------------------
| approxkl           | 1.3512779e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.14           |
| explained_variance | -2.69e-05      |
| fps                | 23             |
| n_updates          | 5159           |
| policy_entropy     | 2.2993717      |
| policy_loss        | -0.00017887354 |
| serial_timesteps   | 660352         |
| time_elapsed       | 4.12e+04       |
| total_timesteps    | 660352         |
| value_loss         | 0.0031956888   |
---------------------------------------
--------------------------------------
| approxkl           | 8.0324935e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.14          |
| explained_variance | -0.000371     |
| fps                | 23            |
| n_updates          | 5160          |
| policy_entropy     | 2.2992244     |
| policy_loss        | 1.2665987e-07 |
| serial_t

Stage done
---------------------------------------
| approxkl           | 1.5295054e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.19           |
| explained_variance | 0.00586        |
| fps                | 9              |
| n_updates          | 5173           |
| policy_entropy     | 2.2853684      |
| policy_loss        | -0.00024197623 |
| serial_timesteps   | 662144         |
| time_elapsed       | 4.13e+04       |
| total_timesteps    | 662144         |
| value_loss         | 0.006193678    |
---------------------------------------
--------------------------------------
| approxkl           | 5.6960967e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.19          |
| explained_variance | -0.00232      |
| fps                | 23            |
| n_updates          | 5174          |
| policy_entropy     | 2.284303      |
| policy_loss        | -0.0004202444 |

--------------------------------------
| approxkl           | 3.4129305e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.24e+03      |
| ep_reward_mean     | 4.22          |
| explained_variance | -0.0224       |
| fps                | 22            |
| n_updates          | 5187          |
| policy_entropy     | 2.2038817     |
| policy_loss        | -0.0011352226 |
| serial_timesteps   | 663936        |
| time_elapsed       | 4.14e+04      |
| total_timesteps    | 663936        |
| value_loss         | 0.006155024   |
--------------------------------------
Stage done
---------------------------------------
| approxkl           | 2.6879483e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.24e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | -0.0111        |
| fps                | 9              |
| n_updates          | 5188           |
| policy_entropy     | 2.2130883      |
| policy_loss        | -0.00018086191 |
| se

--------------------------------------
| approxkl           | 5.344719e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.24e+03      |
| ep_reward_mean     | 4.22          |
| explained_variance | -0.0019       |
| fps                | 23            |
| n_updates          | 5201          |
| policy_entropy     | 2.2228212     |
| policy_loss        | -6.509945e-05 |
| serial_timesteps   | 665728        |
| time_elapsed       | 4.15e+04      |
| total_timesteps    | 665728        |
| value_loss         | 0.0071904026  |
--------------------------------------
---------------------------------------
| approxkl           | 1.6586903e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.24e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | 0.000557       |
| fps                | 23             |
| n_updates          | 5202           |
| policy_entropy     | 2.2250915      |
| policy_loss        | -0.00017693825 |
| serial_timest

Round done
---------------------------------------
| approxkl           | 3.58214e-06    |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.25           |
| explained_variance | 0.0551         |
| fps                | 11             |
| n_updates          | 5215           |
| policy_entropy     | 2.1456916      |
| policy_loss        | -0.00017826818 |
| serial_timesteps   | 667520         |
| time_elapsed       | 4.16e+04       |
| total_timesteps    | 667520         |
| value_loss         | 0.0015515918   |
---------------------------------------
---------------------------------------
| approxkl           | 5.2208397e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.25           |
| explained_variance | -0.00246       |
| fps                | 23             |
| n_updates          | 5216           |
| policy_entropy     | 2.1895347      |
| policy_loss        | -0.000

Round done
--------------------------------------
| approxkl           | 5.2570385e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.23e+03      |
| ep_reward_mean     | 4.22          |
| explained_variance | 0.00126       |
| fps                | 11            |
| n_updates          | 5229          |
| policy_entropy     | 2.2892222     |
| policy_loss        | -6.73309e-05  |
| serial_timesteps   | 669312        |
| time_elapsed       | 4.18e+04      |
| total_timesteps    | 669312        |
| value_loss         | 0.010471314   |
--------------------------------------
---------------------------------------
| approxkl           | 5.225254e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 4.22           |
| explained_variance | 0.000387       |
| fps                | 24             |
| n_updates          | 5230           |
| policy_entropy     | 2.291356       |
| policy_loss        | -0.00015156949 |
| se

Stage done
--------------------------------------
| approxkl           | 2.0957787e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.22e+03      |
| ep_reward_mean     | 4.23          |
| explained_variance | 0.00286       |
| fps                | 9             |
| n_updates          | 5243          |
| policy_entropy     | 2.3097935     |
| policy_loss        | 4.5131892e-05 |
| serial_timesteps   | 671104        |
| time_elapsed       | 4.19e+04      |
| total_timesteps    | 671104        |
| value_loss         | 0.0048050955  |
--------------------------------------
--------------------------------------
| approxkl           | 4.7337664e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.22e+03      |
| ep_reward_mean     | 4.23          |
| explained_variance | 0.00274       |
| fps                | 23            |
| n_updates          | 5244          |
| policy_entropy     | 2.311223      |
| policy_loss        | 6.444752e-07  |
| serial_times

---------------------------------------
| approxkl           | 2.3934722e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 4.21           |
| explained_variance | 0.00583        |
| fps                | 23             |
| n_updates          | 5257           |
| policy_entropy     | 2.3146582      |
| policy_loss        | -0.00038614497 |
| serial_timesteps   | 672896         |
| time_elapsed       | 4.2e+04        |
| total_timesteps    | 672896         |
| value_loss         | 0.003648964    |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 3.7517284e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 4.21           |
| explained_variance | 0.00165        |
| fps                | 10             |
| n_updates          | 5258           |
| policy_entropy     | 2.3121288      |
| policy_loss        | -0.000

Round done
--------------------------------------
| approxkl           | 1.8656256e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.21e+03      |
| ep_reward_mean     | 4.24          |
| explained_variance | 0.000137      |
| fps                | 11            |
| n_updates          | 5271          |
| policy_entropy     | 2.3031738     |
| policy_loss        | -6.556511e-07 |
| serial_timesteps   | 674688        |
| time_elapsed       | 4.21e+04      |
| total_timesteps    | 674688        |
| value_loss         | 0.0025080165  |
--------------------------------------
---------------------------------------
| approxkl           | 5.2057464e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.24           |
| explained_variance | -0.0212        |
| fps                | 24             |
| n_updates          | 5272           |
| policy_entropy     | 2.3138342      |
| policy_loss        | -1.4621764e-05 |
| se

--------------------------------------
| approxkl           | 1.7186716e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 4.22          |
| explained_variance | 0.00428       |
| fps                | 22            |
| n_updates          | 5285          |
| policy_entropy     | 2.2891903     |
| policy_loss        | -0.0003612209 |
| serial_timesteps   | 676480        |
| time_elapsed       | 4.22e+04      |
| total_timesteps    | 676480        |
| value_loss         | 0.006727171   |
--------------------------------------
Stage done
--------------------------------------
| approxkl           | 1.833334e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 4.22          |
| explained_variance | 0.00569       |
| fps                | 8             |
| n_updates          | 5286          |
| policy_entropy     | 2.2907286     |
| policy_loss        | -0.0002495572 |
| serial_times

Stage done
---------------------------------------
| approxkl           | 1.5494294e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 4.22           |
| explained_variance | 0.000378       |
| fps                | 9              |
| n_updates          | 5299           |
| policy_entropy     | 2.268507       |
| policy_loss        | -0.00018543005 |
| serial_timesteps   | 678272         |
| time_elapsed       | 4.23e+04       |
| total_timesteps    | 678272         |
| value_loss         | 0.00735234     |
---------------------------------------
---------------------------------------
| approxkl           | 9.735002e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 4.22           |
| explained_variance | 0.00047        |
| fps                | 22             |
| n_updates          | 5300           |
| policy_entropy     | 2.2618694      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 7.2688326e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.18e+03      |
| ep_reward_mean     | 4.21          |
| explained_variance | 0.00361       |
| fps                | 24            |
| n_updates          | 5313          |
| policy_entropy     | 2.187181      |
| policy_loss        | -7.223338e-05 |
| serial_timesteps   | 680064        |
| time_elapsed       | 4.25e+04      |
| total_timesteps    | 680064        |
| value_loss         | 0.0029979546  |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 5.892418e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 4.21           |
| explained_variance | -0.0302        |
| fps                | 11             |
| n_updates          | 5314           |
| policy_entropy     | 2.2356043      |
| policy_loss        | -0.00014876947 |
| se

---------------------------------------
| approxkl           | 2.380873e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 4.21           |
| explained_variance | 0.000733       |
| fps                | 23             |
| n_updates          | 5327           |
| policy_entropy     | 2.2735963      |
| policy_loss        | -0.00016419217 |
| serial_timesteps   | 681856         |
| time_elapsed       | 4.26e+04       |
| total_timesteps    | 681856         |
| value_loss         | 0.004091531    |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 1.6532057e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 4.21           |
| explained_variance | -0.000267      |
| fps                | 8              |
| n_updates          | 5328           |
| policy_entropy     | 2.276098       |
| policy_loss        | -0.000

---------------------------------------
| approxkl           | 4.0081994e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 4.21           |
| explained_variance | 0.000118       |
| fps                | 23             |
| n_updates          | 5341           |
| policy_entropy     | 2.2933083      |
| policy_loss        | -8.5208565e-05 |
| serial_timesteps   | 683648         |
| time_elapsed       | 4.27e+04       |
| total_timesteps    | 683648         |
| value_loss         | 0.00603117     |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 3.8270113e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 4.21           |
| explained_variance | -8.4e-05       |
| fps                | 9              |
| n_updates          | 5342           |
| policy_entropy     | 2.293508       |
| policy_loss        | -7.647

--------------------------------------
| approxkl           | 2.575392e-05  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.16e+03      |
| ep_reward_mean     | 4.18          |
| explained_variance | -0.0217       |
| fps                | 24            |
| n_updates          | 5355          |
| policy_entropy     | 2.181419      |
| policy_loss        | -0.0013920041 |
| serial_timesteps   | 685440        |
| time_elapsed       | 4.28e+04      |
| total_timesteps    | 685440        |
| value_loss         | 0.0017904827  |
--------------------------------------
---------------------------------------
| approxkl           | 2.5127782e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.16e+03       |
| ep_reward_mean     | 4.18           |
| explained_variance | -0.00522       |
| fps                | 24             |
| n_updates          | 5356           |
| policy_entropy     | 2.2711532      |
| policy_loss        | -0.00037821382 |
| serial_timest

--------------------------------------
| approxkl           | 5.8948723e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.16e+03      |
| ep_reward_mean     | 4.18          |
| explained_variance | -0.000464     |
| fps                | 24            |
| n_updates          | 5369          |
| policy_entropy     | 2.3112996     |
| policy_loss        | -4.55454e-05  |
| serial_timesteps   | 687232        |
| time_elapsed       | 4.29e+04      |
| total_timesteps    | 687232        |
| value_loss         | 0.0059706117  |
--------------------------------------
--------------------------------------
| approxkl           | 9.777385e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.16e+03      |
| ep_reward_mean     | 4.18          |
| explained_variance | 0.000229      |
| fps                | 24            |
| n_updates          | 5370          |
| policy_entropy     | 2.3128502     |
| policy_loss        | -0.0001442954 |
| serial_timesteps   | 68

Round done
---------------------------------------
| approxkl           | 3.3760023e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.14e+03       |
| ep_reward_mean     | 4.17           |
| explained_variance | 0.00288        |
| fps                | 11             |
| n_updates          | 5383           |
| policy_entropy     | 2.3076253      |
| policy_loss        | -1.3638288e-05 |
| serial_timesteps   | 689024         |
| time_elapsed       | 4.3e+04        |
| total_timesteps    | 689024         |
| value_loss         | 0.0069975248   |
---------------------------------------
---------------------------------------
| approxkl           | 2.384444e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.14e+03       |
| ep_reward_mean     | 4.17           |
| explained_variance | 0.00186        |
| fps                | 24             |
| n_updates          | 5384           |
| policy_entropy     | 2.3053508      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 5.6652952e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.13e+03      |
| ep_reward_mean     | 4.17          |
| explained_variance | -0.00122      |
| fps                | 24            |
| n_updates          | 5397          |
| policy_entropy     | 2.2878363     |
| policy_loss        | 7.547438e-05  |
| serial_timesteps   | 690816        |
| time_elapsed       | 4.31e+04      |
| total_timesteps    | 690816        |
| value_loss         | 0.0027508768  |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 1.9988868e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.13e+03       |
| ep_reward_mean     | 4.17           |
| explained_variance | -0.00297       |
| fps                | 11             |
| n_updates          | 5398           |
| policy_entropy     | 2.3043172      |
| policy_loss        | -0.00011267513 |
| se

---------------------------------------
| approxkl           | 1.4878284e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.13e+03       |
| ep_reward_mean     | 4.17           |
| explained_variance | -0.000604      |
| fps                | 23             |
| n_updates          | 5411           |
| policy_entropy     | 2.3216543      |
| policy_loss        | -0.00013485737 |
| serial_timesteps   | 692608         |
| time_elapsed       | 4.32e+04       |
| total_timesteps    | 692608         |
| value_loss         | 0.005042059    |
---------------------------------------
--------------------------------------
| approxkl           | 4.1676503e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.13e+03      |
| ep_reward_mean     | 4.17          |
| explained_variance | 0.000241      |
| fps                | 23            |
| n_updates          | 5412          |
| policy_entropy     | 2.3203192     |
| policy_loss        | 8.063018e-05  |
| serial_t

--------------------------------------
| approxkl           | 1.8322153e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.12e+03      |
| ep_reward_mean     | 4.13          |
| explained_variance | -0.00782      |
| fps                | 24            |
| n_updates          | 5425          |
| policy_entropy     | 2.306479      |
| policy_loss        | 3.9409846e-05 |
| serial_timesteps   | 694400        |
| time_elapsed       | 4.33e+04      |
| total_timesteps    | 694400        |
| value_loss         | 0.0041567045  |
--------------------------------------
---------------------------------------
| approxkl           | 8.60928e-07    |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.12e+03       |
| ep_reward_mean     | 4.13           |
| explained_variance | -0.00676       |
| fps                | 24             |
| n_updates          | 5426           |
| policy_entropy     | 2.3087687      |
| policy_loss        | -0.00017077662 |
| serial_timest

---------------------------------------
| approxkl           | 8.334003e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.12e+03       |
| ep_reward_mean     | 4.13           |
| explained_variance | -0.000451      |
| fps                | 23             |
| n_updates          | 5439           |
| policy_entropy     | 2.29776        |
| policy_loss        | -0.00028693723 |
| serial_timesteps   | 696192         |
| time_elapsed       | 4.34e+04       |
| total_timesteps    | 696192         |
| value_loss         | 0.0045501385   |
---------------------------------------
Round done
--------------------------------------
| approxkl           | 9.328454e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.12e+03      |
| ep_reward_mean     | 4.13          |
| explained_variance | 0.000645      |
| fps                | 11            |
| n_updates          | 5440          |
| policy_entropy     | 2.2984498     |
| policy_loss        | -2.682209e-05 |

Stage done
--------------------------------------
| approxkl           | 5.722483e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.13e+03      |
| ep_reward_mean     | 4.1           |
| explained_variance | 0.00159       |
| fps                | 9             |
| n_updates          | 5453          |
| policy_entropy     | 2.3111348     |
| policy_loss        | 3.9318576e-05 |
| serial_timesteps   | 697984        |
| time_elapsed       | 4.35e+04      |
| total_timesteps    | 697984        |
| value_loss         | 0.0029408091  |
--------------------------------------
---------------------------------------
| approxkl           | 5.563786e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.13e+03       |
| ep_reward_mean     | 4.1            |
| explained_variance | 0.000213       |
| fps                | 23             |
| n_updates          | 5454           |
| policy_entropy     | 2.3106685      |
| policy_loss        | -0.00010955706 |
| se

---------------------------------------
| approxkl           | 3.6326855e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.13e+03       |
| ep_reward_mean     | 4.1            |
| explained_variance | -0.000266      |
| fps                | 23             |
| n_updates          | 5467           |
| policy_entropy     | 2.3006635      |
| policy_loss        | -0.00011710636 |
| serial_timesteps   | 699776         |
| time_elapsed       | 4.37e+04       |
| total_timesteps    | 699776         |
| value_loss         | 0.003473788    |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 8.006495e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.13e+03       |
| ep_reward_mean     | 4.1            |
| explained_variance | -0.000249      |
| fps                | 11             |
| n_updates          | 5468           |
| policy_entropy     | 2.3015723      |
| policy_loss        | -5.908

--------------------------------------
| approxkl           | 2.2623213e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.14e+03      |
| ep_reward_mean     | 4.08          |
| explained_variance | -0.00133      |
| fps                | 24            |
| n_updates          | 5481          |
| policy_entropy     | 2.3056645     |
| policy_loss        | 1.9770581e-05 |
| serial_timesteps   | 701568        |
| time_elapsed       | 4.38e+04      |
| total_timesteps    | 701568        |
| value_loss         | 0.003982624   |
--------------------------------------
---------------------------------------
| approxkl           | 3.5156017e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.14e+03       |
| ep_reward_mean     | 4.08           |
| explained_variance | -0.00262       |
| fps                | 23             |
| n_updates          | 5482           |
| policy_entropy     | 2.306629       |
| policy_loss        | -0.00012455229 |
| serial_timest

--------------------------------------
| approxkl           | 6.208527e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.14e+03      |
| ep_reward_mean     | 4.08          |
| explained_variance | 0.000233      |
| fps                | 23            |
| n_updates          | 5495          |
| policy_entropy     | 2.3173583     |
| policy_loss        | -0.0001466563 |
| serial_timesteps   | 703360        |
| time_elapsed       | 4.39e+04      |
| total_timesteps    | 703360        |
| value_loss         | 0.004744222   |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 9.0056085e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.14e+03       |
| ep_reward_mean     | 4.08           |
| explained_variance | -0.000772      |
| fps                | 11             |
| n_updates          | 5496           |
| policy_entropy     | 2.3185651      |
| policy_loss        | -2.3303553e-05 |
| se

Round done
--------------------------------------
| approxkl           | 1.2153203e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.14e+03      |
| ep_reward_mean     | 4.08          |
| explained_variance | 0.000248      |
| fps                | 11            |
| n_updates          | 5509          |
| policy_entropy     | 2.3175416     |
| policy_loss        | 7.100031e-05  |
| serial_timesteps   | 705152        |
| time_elapsed       | 4.4e+04       |
| total_timesteps    | 705152        |
| value_loss         | 0.0042559355  |
--------------------------------------
--------------------------------------
| approxkl           | 3.1394148e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.14e+03      |
| ep_reward_mean     | 4.08          |
| explained_variance | -1.19e-06     |
| fps                | 23            |
| n_updates          | 5510          |
| policy_entropy     | 2.3171005     |
| policy_loss        | -4.709512e-05 |
| serial_times

Round done
---------------------------------------
| approxkl           | 1.5006369e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 4.13           |
| explained_variance | -0.000358      |
| fps                | 11             |
| n_updates          | 5523           |
| policy_entropy     | 2.3055513      |
| policy_loss        | -3.9449893e-05 |
| serial_timesteps   | 706944         |
| time_elapsed       | 4.41e+04       |
| total_timesteps    | 706944         |
| value_loss         | 0.005236592    |
---------------------------------------
---------------------------------------
| approxkl           | 1.3116191e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 4.13           |
| explained_variance | -0.00806       |
| fps                | 24             |
| n_updates          | 5524           |
| policy_entropy     | 2.3107555      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 5.9591537e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.18e+03      |
| ep_reward_mean     | 4.13          |
| explained_variance | -0.000627     |
| fps                | 24            |
| n_updates          | 5537          |
| policy_entropy     | 2.2954168     |
| policy_loss        | -7.368624e-05 |
| serial_timesteps   | 708736        |
| time_elapsed       | 4.42e+04      |
| total_timesteps    | 708736        |
| value_loss         | 0.0077639758  |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 4.5792814e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.18e+03      |
| ep_reward_mean     | 4.13          |
| explained_variance | -0.000764     |
| fps                | 11            |
| n_updates          | 5538          |
| policy_entropy     | 2.2951503     |
| policy_loss        | -9.306148e-05 |
| serial_times

Round done
--------------------------------------
| approxkl           | 3.2231976e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.19e+03      |
| ep_reward_mean     | 4.12          |
| explained_variance | -0.00301      |
| fps                | 11            |
| n_updates          | 5551          |
| policy_entropy     | 2.2980971     |
| policy_loss        | 8.685514e-05  |
| serial_timesteps   | 710528        |
| time_elapsed       | 4.43e+04      |
| total_timesteps    | 710528        |
| value_loss         | 0.0040009655  |
--------------------------------------
---------------------------------------
| approxkl           | 5.757736e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 4.12           |
| explained_variance | -0.00407       |
| fps                | 24             |
| n_updates          | 5552           |
| policy_entropy     | 2.299811       |
| policy_loss        | -1.0095537e-06 |
| se

---------------------------------------
| approxkl           | 2.5395493e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 4.12           |
| explained_variance | 0.000115       |
| fps                | 24             |
| n_updates          | 5565           |
| policy_entropy     | 2.2978356      |
| policy_loss        | -0.00027318858 |
| serial_timesteps   | 712320         |
| time_elapsed       | 4.44e+04       |
| total_timesteps    | 712320         |
| value_loss         | 0.0039960532   |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 5.2711703e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 4.12           |
| explained_variance | 0.000517       |
| fps                | 11             |
| n_updates          | 5566           |
| policy_entropy     | 2.3014195      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 6.40528e-07   |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.19e+03      |
| ep_reward_mean     | 4.12          |
| explained_variance | -0.000132     |
| fps                | 24            |
| n_updates          | 5579          |
| policy_entropy     | 2.3063555     |
| policy_loss        | 0.00012105331 |
| serial_timesteps   | 714112        |
| time_elapsed       | 4.45e+04      |
| total_timesteps    | 714112        |
| value_loss         | 0.0049467003  |
--------------------------------------
---------------------------------------
| approxkl           | 1.5731413e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 4.12           |
| explained_variance | 0.000253       |
| fps                | 24             |
| n_updates          | 5580           |
| policy_entropy     | 2.3060257      |
| policy_loss        | -4.3505803e-05 |
| serial_timest

Round done
---------------------------------------
| approxkl           | 1.2786875e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.15           |
| explained_variance | -0.00103       |
| fps                | 11             |
| n_updates          | 5593           |
| policy_entropy     | 2.3148274      |
| policy_loss        | -0.00012141629 |
| serial_timesteps   | 715904         |
| time_elapsed       | 4.46e+04       |
| total_timesteps    | 715904         |
| value_loss         | 0.0026918063   |
---------------------------------------
--------------------------------------
| approxkl           | 3.0746687e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.21e+03      |
| ep_reward_mean     | 4.15          |
| explained_variance | 0.00165       |
| fps                | 23            |
| n_updates          | 5594          |
| policy_entropy     | 2.3173292     |
| policy_loss        | -0.0003341213 |

---------------------------------------
| approxkl           | 1.5426742e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 4.14           |
| explained_variance | 0.00162        |
| fps                | 23             |
| n_updates          | 5607           |
| policy_entropy     | 2.3037999      |
| policy_loss        | -0.00011304766 |
| serial_timesteps   | 717696         |
| time_elapsed       | 4.47e+04       |
| total_timesteps    | 717696         |
| value_loss         | 0.0032753404   |
---------------------------------------
--------------------------------------
| approxkl           | 6.8600566e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.22e+03      |
| ep_reward_mean     | 4.14          |
| explained_variance | 0.000677      |
| fps                | 23            |
| n_updates          | 5608          |
| policy_entropy     | 2.3328223     |
| policy_loss        | -0.0004692739 |
| serial_t

Round done
--------------------------------------
| approxkl           | 1.8767216e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.22e+03      |
| ep_reward_mean     | 4.14          |
| explained_variance | -0.000703     |
| fps                | 10            |
| n_updates          | 5621          |
| policy_entropy     | 2.345718      |
| policy_loss        | -8.073449e-05 |
| serial_timesteps   | 719488        |
| time_elapsed       | 4.49e+04      |
| total_timesteps    | 719488        |
| value_loss         | 0.0045594154  |
--------------------------------------
---------------------------------------
| approxkl           | 5.4828377e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 4.14           |
| explained_variance | 0.000267       |
| fps                | 23             |
| n_updates          | 5622           |
| policy_entropy     | 2.3450618      |
| policy_loss        | -0.00013770536 |
| se

---------------------------------------
| approxkl           | 2.3789719e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.15           |
| explained_variance | 0.00214        |
| fps                | 23             |
| n_updates          | 5635           |
| policy_entropy     | 2.3334246      |
| policy_loss        | -0.00030688196 |
| serial_timesteps   | 721280         |
| time_elapsed       | 4.5e+04        |
| total_timesteps    | 721280         |
| value_loss         | 0.0018607838   |
---------------------------------------
---------------------------------------
| approxkl           | 1.7946968e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.15           |
| explained_variance | -0.00238       |
| fps                | 23             |
| n_updates          | 5636           |
| policy_entropy     | 2.3360152      |
| policy_loss        | -0.00011105649 |


---------------------------------------
| approxkl           | 2.7730007e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.15           |
| explained_variance | 0.00108        |
| fps                | 22             |
| n_updates          | 5649           |
| policy_entropy     | 2.322104       |
| policy_loss        | -0.00019275397 |
| serial_timesteps   | 723072         |
| time_elapsed       | 4.51e+04       |
| total_timesteps    | 723072         |
| value_loss         | 0.0024949394   |
---------------------------------------
---------------------------------------
| approxkl           | 1.8860396e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.15           |
| explained_variance | 0.000628       |
| fps                | 23             |
| n_updates          | 5650           |
| policy_entropy     | 2.3196678      |
| policy_loss        | -5.2414834e-05 |


Stage done
--------------------------------------
| approxkl           | 1.5249818e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.21e+03      |
| ep_reward_mean     | 4.13          |
| explained_variance | 0.000579      |
| fps                | 9             |
| n_updates          | 5663          |
| policy_entropy     | 2.3153408     |
| policy_loss        | -9.17539e-05  |
| serial_timesteps   | 724864        |
| time_elapsed       | 4.52e+04      |
| total_timesteps    | 724864        |
| value_loss         | 0.0060170195  |
--------------------------------------
---------------------------------------
| approxkl           | 1.2414896e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.13           |
| explained_variance | 0.00122        |
| fps                | 22             |
| n_updates          | 5664           |
| policy_entropy     | 2.3169806      |
| policy_loss        | -0.00017470494 |
| se

Stage done
--------------------------------------
| approxkl           | 2.5352826e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.21e+03      |
| ep_reward_mean     | 4.13          |
| explained_variance | -4.32e-05     |
| fps                | 9             |
| n_updates          | 5677          |
| policy_entropy     | 2.3074365     |
| policy_loss        | -9.655394e-05 |
| serial_timesteps   | 726656        |
| time_elapsed       | 4.53e+04      |
| total_timesteps    | 726656        |
| value_loss         | 0.004049775   |
--------------------------------------
---------------------------------------
| approxkl           | 1.2202129e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.13           |
| explained_variance | -0.00032       |
| fps                | 23             |
| n_updates          | 5678           |
| policy_entropy     | 2.3049624      |
| policy_loss        | -2.5711954e-05 |
| se

Stage done
---------------------------------------
| approxkl           | 1.2459617e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 4.18           |
| explained_variance | 0.00116        |
| fps                | 9              |
| n_updates          | 5691           |
| policy_entropy     | 2.3259618      |
| policy_loss        | -0.00013680756 |
| serial_timesteps   | 728448         |
| time_elapsed       | 4.54e+04       |
| total_timesteps    | 728448         |
| value_loss         | 0.0048369835   |
---------------------------------------
---------------------------------------
| approxkl           | 2.5707413e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 4.18           |
| explained_variance | 0.00136        |
| fps                | 23             |
| n_updates          | 5692           |
| policy_entropy     | 2.3280668      |
| policy_loss        | -0.000

---------------------------------------
| approxkl           | 3.1882385e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 4.18           |
| explained_variance | 8.16e-05       |
| fps                | 23             |
| n_updates          | 5705           |
| policy_entropy     | 2.3361754      |
| policy_loss        | -2.2117049e-05 |
| serial_timesteps   | 730240         |
| time_elapsed       | 4.55e+04       |
| total_timesteps    | 730240         |
| value_loss         | 0.004801855    |
---------------------------------------
Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 1.3442655e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.23e+03      |
| ep_reward_mean     | 4.19          |
| explained_variance | 0.0289        |
| fps                | 8             |
| n_updates          | 5706          |
| policy_entrop

---------------------------------------
| approxkl           | 1.7229111e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 4.19           |
| explained_variance | -0.000372      |
| fps                | 23             |
| n_updates          | 5719           |
| policy_entropy     | 2.32785        |
| policy_loss        | -9.2152506e-05 |
| serial_timesteps   | 732032         |
| time_elapsed       | 4.56e+04       |
| total_timesteps    | 732032         |
| value_loss         | 0.003108681    |
---------------------------------------
---------------------------------------
| approxkl           | 7.788303e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 4.19           |
| explained_variance | -0.000772      |
| fps                | 23             |
| n_updates          | 5720           |
| policy_entropy     | 2.3290293      |
| policy_loss        | -5.6300312e-05 |


---------------------------------------
| approxkl           | 1.064157e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 4.19           |
| explained_variance | 0.000126       |
| fps                | 23             |
| n_updates          | 5733           |
| policy_entropy     | 2.3426442      |
| policy_loss        | -0.00023486279 |
| serial_timesteps   | 733824         |
| time_elapsed       | 4.57e+04       |
| total_timesteps    | 733824         |
| value_loss         | 0.007920781    |
---------------------------------------
-------------------------------------
| approxkl           | 1.05969e-06  |
| clipfrac           | 0.0          |
| ep_len_mean        | 3.23e+03     |
| ep_reward_mean     | 4.19         |
| explained_variance | 0.000137     |
| fps                | 23           |
| n_updates          | 5734         |
| policy_entropy     | 2.3438225    |
| policy_loss        | -4.26732e-05 |
| serial_timesteps  

---------------------------------------
| approxkl           | 5.145259e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 4.17           |
| explained_variance | -0.00228       |
| fps                | 23             |
| n_updates          | 5747           |
| policy_entropy     | 2.3424904      |
| policy_loss        | -0.00016056187 |
| serial_timesteps   | 735616         |
| time_elapsed       | 4.59e+04       |
| total_timesteps    | 735616         |
| value_loss         | 0.0035736137   |
---------------------------------------
Round done
--------------------------------------
| approxkl           | 5.247758e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.22e+03      |
| ep_reward_mean     | 4.17          |
| explained_variance | 0.000564      |
| fps                | 10            |
| n_updates          | 5748          |
| policy_entropy     | 2.3421397     |
| policy_loss        | -6.333552e-05 |

Stage done
---------------------------------------
| approxkl           | 5.5963244e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 4.17           |
| explained_variance | -8.55e-05      |
| fps                | 9              |
| n_updates          | 5761           |
| policy_entropy     | 2.3391824      |
| policy_loss        | -0.00014618412 |
| serial_timesteps   | 737408         |
| time_elapsed       | 4.6e+04        |
| total_timesteps    | 737408         |
| value_loss         | 0.007902074    |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 1.5364467e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 4.17           |
| explained_variance | 2.89e-05       |
| fps                | 9              |
| n_updates          | 5762           |
| policy_entropy     | 2.3383677      |
| policy_loss     

---------------------------------------
| approxkl           | 1.6899419e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 4.17           |
| explained_variance | -0.00011       |
| fps                | 23             |
| n_updates          | 5775           |
| policy_entropy     | 2.3123798      |
| policy_loss        | -4.2411964e-05 |
| serial_timesteps   | 739200         |
| time_elapsed       | 4.61e+04       |
| total_timesteps    | 739200         |
| value_loss         | 0.0039022802   |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 4.462927e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 4.17           |
| explained_variance | -0.000113      |
| fps                | 9              |
| n_updates          | 5776           |
| policy_entropy     | 2.311372       |
| policy_loss        | -2.371

---------------------------------------
| approxkl           | 2.2102345e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.25e+03       |
| ep_reward_mean     | 4.19           |
| explained_variance | 0.00139        |
| fps                | 23             |
| n_updates          | 5789           |
| policy_entropy     | 2.3187578      |
| policy_loss        | -0.00031857938 |
| serial_timesteps   | 740992         |
| time_elapsed       | 4.62e+04       |
| total_timesteps    | 740992         |
| value_loss         | 0.0029682163   |
---------------------------------------
--------------------------------------
| approxkl           | 8.2201075e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.25e+03      |
| ep_reward_mean     | 4.19          |
| explained_variance | -0.00148      |
| fps                | 23            |
| n_updates          | 5790          |
| policy_entropy     | 2.3205967     |
| policy_loss        | 9.63714e-05   |
| serial_t

Round done
---------------------------------------
| approxkl           | 6.585188e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.25e+03       |
| ep_reward_mean     | 4.19           |
| explained_variance | -0.000339      |
| fps                | 10             |
| n_updates          | 5803           |
| policy_entropy     | 2.3118541      |
| policy_loss        | -0.00010131672 |
| serial_timesteps   | 742784         |
| time_elapsed       | 4.63e+04       |
| total_timesteps    | 742784         |
| value_loss         | 0.008090019    |
---------------------------------------
--------------------------------------
| approxkl           | 6.2502943e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.25e+03      |
| ep_reward_mean     | 4.19          |
| explained_variance | 7.46e-05      |
| fps                | 23            |
| n_updates          | 5804          |
| policy_entropy     | 2.3106375     |
| policy_loss        | -5.588308e-05 |

Stage done
---------------------------------------
| approxkl           | 8.436936e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.14           |
| explained_variance | 0.00186        |
| fps                | 9              |
| n_updates          | 5817           |
| policy_entropy     | 2.3180695      |
| policy_loss        | -7.6040626e-05 |
| serial_timesteps   | 744576         |
| time_elapsed       | 4.64e+04       |
| total_timesteps    | 744576         |
| value_loss         | 0.003118008    |
---------------------------------------
---------------------------------------
| approxkl           | 6.229478e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.14           |
| explained_variance | -0.0015        |
| fps                | 24             |
| n_updates          | 5818           |
| policy_entropy     | 2.319859       |
| policy_loss        | -8.968

---------------------------------------
| approxkl           | 2.1201695e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.14           |
| explained_variance | -0.00033       |
| fps                | 24             |
| n_updates          | 5831           |
| policy_entropy     | 2.3397996      |
| policy_loss        | -0.00024145469 |
| serial_timesteps   | 746368         |
| time_elapsed       | 4.65e+04       |
| total_timesteps    | 746368         |
| value_loss         | 0.005075109    |
---------------------------------------
--------------------------------------
| approxkl           | 1.4975026e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.14          |
| explained_variance | 4.12e-05      |
| fps                | 22            |
| n_updates          | 5832          |
| policy_entropy     | 2.3419542     |
| policy_loss        | 2.6205555e-05 |
| serial_t

Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 1.6716984e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.27e+03      |
| ep_reward_mean     | 4.2           |
| explained_variance | 0.41          |
| fps                | 8             |
| n_updates          | 5845          |
| policy_entropy     | 2.264482      |
| policy_loss        | -0.0017240383 |
| serial_timesteps   | 748160        |
| time_elapsed       | 4.66e+04      |
| total_timesteps    | 748160        |
| value_loss         | 0.0018438874  |
--------------------------------------
--------------------------------------
| approxkl           | 7.877921e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.27e+03      |
| ep_reward_mean     | 4.2           |
| explained_variance | 0.0407        |
| fps                | 24            |
| n_updates          | 5846          |
| policy_entropy     | 2.30897

Round done
---------------------------------------
| approxkl           | 1.2002022e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.27e+03       |
| ep_reward_mean     | 4.2            |
| explained_variance | -0.000446      |
| fps                | 11             |
| n_updates          | 5859           |
| policy_entropy     | 2.3426871      |
| policy_loss        | -0.00023549981 |
| serial_timesteps   | 749952         |
| time_elapsed       | 4.68e+04       |
| total_timesteps    | 749952         |
| value_loss         | 0.0027158519   |
---------------------------------------
---------------------------------------
| approxkl           | 1.2354931e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.27e+03       |
| ep_reward_mean     | 4.2            |
| explained_variance | -3.83e-05      |
| fps                | 24             |
| n_updates          | 5860           |
| policy_entropy     | 2.3427012      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 3.7742437e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.16          |
| explained_variance | -0.0917       |
| fps                | 23            |
| n_updates          | 5873          |
| policy_entropy     | 2.2718596     |
| policy_loss        | -0.0011797659 |
| serial_timesteps   | 751744        |
| time_elapsed       | 4.69e+04      |
| total_timesteps    | 751744        |
| value_loss         | 0.0034931083  |
--------------------------------------
---------------------------------------
| approxkl           | 2.7493505e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.16           |
| explained_variance | -0.0115        |
| fps                | 24             |
| n_updates          | 5874           |
| policy_entropy     | 2.2959318      |
| policy_loss        | -0.00050999224 |
| serial_timest

---------------------------------------
| approxkl           | 1.1877232e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.16           |
| explained_variance | -0.000106      |
| fps                | 23             |
| n_updates          | 5887           |
| policy_entropy     | 2.322552       |
| policy_loss        | -0.00014594011 |
| serial_timesteps   | 753536         |
| time_elapsed       | 4.7e+04        |
| total_timesteps    | 753536         |
| value_loss         | 0.0071637915   |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 2.4265514e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.16           |
| explained_variance | 0.00115        |
| fps                | 11             |
| n_updates          | 5888           |
| policy_entropy     | 2.3243744      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 1.3314302e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.12          |
| explained_variance | 0.00217       |
| fps                | 23            |
| n_updates          | 5901          |
| policy_entropy     | 2.2951097     |
| policy_loss        | -0.0005864138 |
| serial_timesteps   | 755328        |
| time_elapsed       | 4.71e+04      |
| total_timesteps    | 755328        |
| value_loss         | 0.0065413713  |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 1.1704579e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.12           |
| explained_variance | 0.00504        |
| fps                | 11             |
| n_updates          | 5902           |
| policy_entropy     | 2.3085675      |
| policy_loss        | -0.00012863986 |
| se

Round done
---------------------------------------
| approxkl           | 3.4446948e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.12           |
| explained_variance | -0.000762      |
| fps                | 10             |
| n_updates          | 5915           |
| policy_entropy     | 2.3030682      |
| policy_loss        | -2.3048371e-05 |
| serial_timesteps   | 757120         |
| time_elapsed       | 4.72e+04       |
| total_timesteps    | 757120         |
| value_loss         | 0.009193974    |
---------------------------------------
--------------------------------------
| approxkl           | 7.9274696e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.12          |
| explained_variance | -0.000178     |
| fps                | 24            |
| n_updates          | 5916          |
| policy_entropy     | 2.3016906     |
| policy_loss        | 4.7048554e-05 |

--------------------------------------
| approxkl           | 1.6616194e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.12          |
| explained_variance | -0.000118     |
| fps                | 23            |
| n_updates          | 5929          |
| policy_entropy     | 2.2443976     |
| policy_loss        | -0.0005163094 |
| serial_timesteps   | 758912        |
| time_elapsed       | 4.73e+04      |
| total_timesteps    | 758912        |
| value_loss         | 0.0057429643  |
--------------------------------------
Stage done
---------------------------------------
| approxkl           | 2.2456617e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.12           |
| explained_variance | 0.00173        |
| fps                | 9              |
| n_updates          | 5930           |
| policy_entropy     | 2.2378252      |
| policy_loss        | -0.00023730844 |
| se

---------------------------------------
| approxkl           | 5.6473864e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.12           |
| explained_variance | -0.000713      |
| fps                | 23             |
| n_updates          | 5943           |
| policy_entropy     | 2.2390146      |
| policy_loss        | -0.00033093058 |
| serial_timesteps   | 760704         |
| time_elapsed       | 4.74e+04       |
| total_timesteps    | 760704         |
| value_loss         | 0.004597378    |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 2.495215e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.12           |
| explained_variance | 0.00057        |
| fps                | 9              |
| n_updates          | 5944           |
| policy_entropy     | 2.2354867      |
| policy_loss        | -0.000

Round done
--------------------------------------
| approxkl           | 9.224584e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.27e+03      |
| ep_reward_mean     | 4.1           |
| explained_variance | 0.00495       |
| fps                | 11            |
| n_updates          | 5957          |
| policy_entropy     | 2.201185      |
| policy_loss        | -0.0005222764 |
| serial_timesteps   | 762496        |
| time_elapsed       | 4.75e+04      |
| total_timesteps    | 762496        |
| value_loss         | 0.002006778   |
--------------------------------------
--------------------------------------
| approxkl           | 1.2665379e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.27e+03      |
| ep_reward_mean     | 4.1           |
| explained_variance | -0.00605      |
| fps                | 23            |
| n_updates          | 5958          |
| policy_entropy     | 2.213563      |
| policy_loss        | -0.0003361851 |
| serial_times

--------------------------------------
| approxkl           | 4.05896e-06   |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.27e+03      |
| ep_reward_mean     | 4.1           |
| explained_variance | -0.00184      |
| fps                | 23            |
| n_updates          | 5971          |
| policy_entropy     | 2.2983909     |
| policy_loss        | -0.0003957525 |
| serial_timesteps   | 764288        |
| time_elapsed       | 4.76e+04      |
| total_timesteps    | 764288        |
| value_loss         | 0.0052043     |
--------------------------------------
---------------------------------------
| approxkl           | 6.6650477e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.27e+03       |
| ep_reward_mean     | 4.1            |
| explained_variance | -0.00238       |
| fps                | 22             |
| n_updates          | 5972           |
| policy_entropy     | 2.3037348      |
| policy_loss        | -0.00027303398 |
| serial_timest

---------------------------------------
| approxkl           | 7.959782e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.29e+03       |
| ep_reward_mean     | 4.12           |
| explained_variance | 0.000991       |
| fps                | 23             |
| n_updates          | 5985           |
| policy_entropy     | 2.2339516      |
| policy_loss        | -0.00046459958 |
| serial_timesteps   | 766080         |
| time_elapsed       | 4.78e+04       |
| total_timesteps    | 766080         |
| value_loss         | 0.0011303178   |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 9.412015e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.29e+03       |
| ep_reward_mean     | 4.12           |
| explained_variance | -0.000406      |
| fps                | 10             |
| n_updates          | 5986           |
| policy_entropy     | 2.2475536      |
| policy_loss        | -5.629

Round done
--------------------------------------
| approxkl           | 6.750893e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.09          |
| explained_variance | 0.0019        |
| fps                | 11            |
| n_updates          | 5999          |
| policy_entropy     | 2.194919      |
| policy_loss        | -0.0001255162 |
| serial_timesteps   | 767872        |
| time_elapsed       | 4.79e+04      |
| total_timesteps    | 767872        |
| value_loss         | 0.0077035176  |
--------------------------------------
---------------------------------------
| approxkl           | 7.533061e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.09           |
| explained_variance | 0.00031        |
| fps                | 23             |
| n_updates          | 6000           |
| policy_entropy     | 2.2106225      |
| policy_loss        | -0.00042205863 |
| se

---------------------------------------
| approxkl           | 2.7576596e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 4.04           |
| explained_variance | -0.00432       |
| fps                | 23             |
| n_updates          | 6013           |
| policy_entropy     | 2.271998       |
| policy_loss        | -2.1876767e-06 |
| serial_timesteps   | 769664         |
| time_elapsed       | 4.8e+04        |
| total_timesteps    | 769664         |
| value_loss         | 0.0033939478   |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 1.740666e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 4.04           |
| explained_variance | 0.000165       |
| fps                | 9              |
| n_updates          | 6014           |
| policy_entropy     | 2.2784128      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 1.2728942e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.23e+03      |
| ep_reward_mean     | 4.04          |
| explained_variance | -0.00193      |
| fps                | 23            |
| n_updates          | 6027          |
| policy_entropy     | 2.294287      |
| policy_loss        | 3.2037497e-06 |
| serial_timesteps   | 771456        |
| time_elapsed       | 4.81e+04      |
| total_timesteps    | 771456        |
| value_loss         | 0.0042631817  |
--------------------------------------
Stage done
---------------------------------------
| approxkl           | 1.5453725e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 4.04           |
| explained_variance | -0.000364      |
| fps                | 9              |
| n_updates          | 6028           |
| policy_entropy     | 2.295324       |
| policy_loss        | -0.00015224703 |
| se

Stage done
--------------------------------------
| approxkl           | 8.3429495e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.25e+03      |
| ep_reward_mean     | 4.04          |
| explained_variance | 0.00422       |
| fps                | 9             |
| n_updates          | 6041          |
| policy_entropy     | 2.2227364     |
| policy_loss        | 0.00010371208 |
| serial_timesteps   | 773248        |
| time_elapsed       | 4.82e+04      |
| total_timesteps    | 773248        |
| value_loss         | 0.0073141847  |
--------------------------------------
--------------------------------------
| approxkl           | 1.4664718e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.25e+03      |
| ep_reward_mean     | 4.04          |
| explained_variance | 0.0043        |
| fps                | 24            |
| n_updates          | 6042          |
| policy_entropy     | 2.2284188     |
| policy_loss        | 6.747013e-05  |
| serial_times

Stage done
--------------------------------------
| approxkl           | 2.4690162e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.25e+03      |
| ep_reward_mean     | 4.04          |
| explained_variance | -0.00226      |
| fps                | 9             |
| n_updates          | 6055          |
| policy_entropy     | 2.2664149     |
| policy_loss        | 0.00015632436 |
| serial_timesteps   | 775040        |
| time_elapsed       | 4.83e+04      |
| total_timesteps    | 775040        |
| value_loss         | 0.008904793   |
--------------------------------------
---------------------------------------
| approxkl           | 1.7813891e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.25e+03       |
| ep_reward_mean     | 4.04           |
| explained_variance | 0.00039        |
| fps                | 24             |
| n_updates          | 6056           |
| policy_entropy     | 2.2684376      |
| policy_loss        | -0.00022221776 |
| se

--------------------------------------
| approxkl           | 5.4342954e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.25e+03      |
| ep_reward_mean     | 4.02          |
| explained_variance | 0.0057        |
| fps                | 24            |
| n_updates          | 6069          |
| policy_entropy     | 2.2474117     |
| policy_loss        | -5.300902e-05 |
| serial_timesteps   | 776832        |
| time_elapsed       | 4.84e+04      |
| total_timesteps    | 776832        |
| value_loss         | 0.0025348172  |
--------------------------------------
---------------------------------------
| approxkl           | 6.5252707e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.25e+03       |
| ep_reward_mean     | 4.02           |
| explained_variance | 0.00552        |
| fps                | 24             |
| n_updates          | 6070           |
| policy_entropy     | 2.248754       |
| policy_loss        | -0.00037596934 |
| serial_timest

--------------------------------------
| approxkl           | 4.676791e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.25e+03      |
| ep_reward_mean     | 4.02          |
| explained_variance | 0.000701      |
| fps                | 23            |
| n_updates          | 6083          |
| policy_entropy     | 2.2764792     |
| policy_loss        | -8.538738e-05 |
| serial_timesteps   | 778624        |
| time_elapsed       | 4.85e+04      |
| total_timesteps    | 778624        |
| value_loss         | 0.0039148834  |
--------------------------------------
---------------------------------------
| approxkl           | 2.0748416e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.25e+03       |
| ep_reward_mean     | 4.02           |
| explained_variance | -0.00221       |
| fps                | 23             |
| n_updates          | 6084           |
| policy_entropy     | 2.2773213      |
| policy_loss        | -0.00037991628 |
| serial_timest

Round done
--------------------------------------
| approxkl           | 7.152882e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.07          |
| explained_variance | -0.00366      |
| fps                | 11            |
| n_updates          | 6097          |
| policy_entropy     | 2.2397523     |
| policy_loss        | -6.982684e-05 |
| serial_timesteps   | 780416        |
| time_elapsed       | 4.86e+04      |
| total_timesteps    | 780416        |
| value_loss         | 0.011259721   |
--------------------------------------
---------------------------------------
| approxkl           | 8.498106e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.07           |
| explained_variance | 0.00753        |
| fps                | 24             |
| n_updates          | 6098           |
| policy_entropy     | 2.2397456      |
| policy_loss        | -0.00024993718 |
| se

---------------------------------------
| approxkl           | 1.0244139e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.07           |
| explained_variance | -0.00296       |
| fps                | 24             |
| n_updates          | 6111           |
| policy_entropy     | 2.299593       |
| policy_loss        | -0.00010500848 |
| serial_timesteps   | 782208         |
| time_elapsed       | 4.88e+04       |
| total_timesteps    | 782208         |
| value_loss         | 0.009386068    |
---------------------------------------
Stage done
--------------------------------------
| approxkl           | 2.9682576e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.07          |
| explained_variance | -0.00439      |
| fps                | 9             |
| n_updates          | 6112          |
| policy_entropy     | 2.301654      |
| policy_loss        | -8.158386e-06 |

Round done
---------------------------------------
| approxkl           | 1.5508133e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.07           |
| explained_variance | 4.83e-06       |
| fps                | 10             |
| n_updates          | 6125           |
| policy_entropy     | 2.3067856      |
| policy_loss        | -0.00021663122 |
| serial_timesteps   | 784000         |
| time_elapsed       | 4.89e+04       |
| total_timesteps    | 784000         |
| value_loss         | 0.0051195333   |
---------------------------------------
--------------------------------------
| approxkl           | 5.8537495e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.07          |
| explained_variance | -0.000231     |
| fps                | 24            |
| n_updates          | 6126          |
| policy_entropy     | 2.3054957     |
| policy_loss        | -8.624047e-06 |

---------------------------------------
| approxkl           | 1.2180705e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.28e+03       |
| ep_reward_mean     | 4.08           |
| explained_variance | -0.00096       |
| fps                | 23             |
| n_updates          | 6139           |
| policy_entropy     | 2.3088245      |
| policy_loss        | -0.00023444183 |
| serial_timesteps   | 785792         |
| time_elapsed       | 4.9e+04        |
| total_timesteps    | 785792         |
| value_loss         | 0.006250645    |
---------------------------------------
Stage done
--------------------------------------
| approxkl           | 1.5588823e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.28e+03      |
| ep_reward_mean     | 4.08          |
| explained_variance | 0.000299      |
| fps                | 9             |
| n_updates          | 6140          |
| policy_entropy     | 2.3091426     |
| policy_loss        | -9.425543e-05 |

---------------------------------------
| approxkl           | 7.065334e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.28e+03       |
| ep_reward_mean     | 4.08           |
| explained_variance | 0.000683       |
| fps                | 22             |
| n_updates          | 6153           |
| policy_entropy     | 2.306181       |
| policy_loss        | -0.00013926066 |
| serial_timesteps   | 787584         |
| time_elapsed       | 4.91e+04       |
| total_timesteps    | 787584         |
| value_loss         | 0.0061266758   |
---------------------------------------
--------------------------------------
| approxkl           | 6.773697e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.28e+03      |
| ep_reward_mean     | 4.08          |
| explained_variance | 6.08e-06      |
| fps                | 23            |
| n_updates          | 6154          |
| policy_entropy     | 2.3059683     |
| policy_loss        | 2.1398067e-05 |
| serial_t

--------------------------------------
| approxkl           | 1.6921149e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.29e+03      |
| ep_reward_mean     | 4.08          |
| explained_variance | -0.0045       |
| fps                | 23            |
| n_updates          | 6167          |
| policy_entropy     | 2.2572982     |
| policy_loss        | -8.754432e-06 |
| serial_timesteps   | 789376        |
| time_elapsed       | 4.92e+04      |
| total_timesteps    | 789376        |
| value_loss         | 0.007899232   |
--------------------------------------
Stage done
---------------------------------------
| approxkl           | 1.4711146e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.29e+03       |
| ep_reward_mean     | 4.08           |
| explained_variance | -0.00431       |
| fps                | 9              |
| n_updates          | 6168           |
| policy_entropy     | 2.2587743      |
| policy_loss        | -0.00012768805 |
| se

-------------------------------------
| approxkl           | 4.423927e-06 |
| clipfrac           | 0.0          |
| ep_len_mean        | 3.28e+03     |
| ep_reward_mean     | 4.06         |
| explained_variance | 0.0191       |
| fps                | 23           |
| n_updates          | 6181         |
| policy_entropy     | 2.1743302    |
| policy_loss        | 9.033363e-05 |
| serial_timesteps   | 791168       |
| time_elapsed       | 4.93e+04     |
| total_timesteps    | 791168       |
| value_loss         | 0.00153092   |
-------------------------------------
--------------------------------------
| approxkl           | 2.0629514e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.28e+03      |
| ep_reward_mean     | 4.06          |
| explained_variance | -0.00187      |
| fps                | 23            |
| n_updates          | 6182          |
| policy_entropy     | 2.1942344     |
| policy_loss        | -0.0001681447 |
| serial_timesteps   | 791296        |
|

Round done
--------------------------------------
| approxkl           | 4.1319174e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.28e+03      |
| ep_reward_mean     | 4.06          |
| explained_variance | 0.000585      |
| fps                | 10            |
| n_updates          | 6195          |
| policy_entropy     | 2.293386      |
| policy_loss        | -8.392148e-05 |
| serial_timesteps   | 792960        |
| time_elapsed       | 4.94e+04      |
| total_timesteps    | 792960        |
| value_loss         | 0.007694382   |
--------------------------------------
---------------------------------------
| approxkl           | 1.2172864e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.28e+03       |
| ep_reward_mean     | 4.06           |
| explained_variance | -0.00156       |
| fps                | 22             |
| n_updates          | 6196           |
| policy_entropy     | 2.2950516      |
| policy_loss        | -0.00031554815 |
| se

--------------------------------------
| approxkl           | 1.3666682e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.04          |
| explained_variance | -0.00213      |
| fps                | 23            |
| n_updates          | 6209          |
| policy_entropy     | 2.2067823     |
| policy_loss        | -0.00031117   |
| serial_timesteps   | 794752        |
| time_elapsed       | 4.95e+04      |
| total_timesteps    | 794752        |
| value_loss         | 0.0051331436  |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 1.4149394e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.04          |
| explained_variance | -0.00246      |
| fps                | 10            |
| n_updates          | 6210          |
| policy_entropy     | 2.2040465     |
| policy_loss        | -0.0005248226 |
| serial_times

----------------------------------------
| approxkl           | 3.2882658e-06   |
| clipfrac           | 0.0             |
| ep_len_mean        | 3.23e+03        |
| ep_reward_mean     | 3.99            |
| explained_variance | 0.00598         |
| fps                | 23              |
| n_updates          | 6223            |
| policy_entropy     | 2.2321966       |
| policy_loss        | -0.000118009746 |
| serial_timesteps   | 796544          |
| time_elapsed       | 4.97e+04        |
| total_timesteps    | 796544          |
| value_loss         | 0.007543272     |
----------------------------------------
---------------------------------------
| approxkl           | 2.335928e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 3.99           |
| explained_variance | -0.000107      |
| fps                | 23             |
| n_updates          | 6224           |
| policy_entropy     | 2.2387605      |
| policy_loss        | -0

---------------------------------------
| approxkl           | 6.7330893e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 4.04           |
| explained_variance | 0.00014        |
| fps                | 22             |
| n_updates          | 6237           |
| policy_entropy     | 2.1916711      |
| policy_loss        | -0.00032302178 |
| serial_timesteps   | 798336         |
| time_elapsed       | 4.98e+04       |
| total_timesteps    | 798336         |
| value_loss         | 0.0076640924   |
---------------------------------------
---------------------------------------
| approxkl           | 9.407375e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 4.04           |
| explained_variance | -2.68e-05      |
| fps                | 23             |
| n_updates          | 6238           |
| policy_entropy     | 2.2123575      |
| policy_loss        | -0.00053800736 |


----------------------------------------
| approxkl           | 2.5212894e-06   |
| clipfrac           | 0.0             |
| ep_len_mean        | 3.23e+03        |
| ep_reward_mean     | 4.04            |
| explained_variance | 3.47e-05        |
| fps                | 22              |
| n_updates          | 6251            |
| policy_entropy     | 2.295393        |
| policy_loss        | -0.000103242695 |
| serial_timesteps   | 800128          |
| time_elapsed       | 4.99e+04        |
| total_timesteps    | 800128          |
| value_loss         | 0.0073713325    |
----------------------------------------
---------------------------------------
| approxkl           | 2.017033e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 4.04           |
| explained_variance | 0.00219        |
| fps                | 22             |
| n_updates          | 6252           |
| policy_entropy     | 2.2996213      |
| policy_loss        | -0

---------------------------------------
| approxkl           | 4.942098e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.02           |
| explained_variance | -0.00629       |
| fps                | 21             |
| n_updates          | 6265           |
| policy_entropy     | 2.2401981      |
| policy_loss        | -0.00024788827 |
| serial_timesteps   | 801920         |
| time_elapsed       | 5e+04          |
| total_timesteps    | 801920         |
| value_loss         | 0.0062363464   |
---------------------------------------
---------------------------------------
| approxkl           | 3.0591523e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.02           |
| explained_variance | -0.000868      |
| fps                | 22             |
| n_updates          | 6266           |
| policy_entropy     | 2.2555473      |
| policy_loss        | -0.00014246255 |


Round done
--------------------------------------
| approxkl           | 1.0283211e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.21e+03      |
| ep_reward_mean     | 4.02          |
| explained_variance | -7.24e-05     |
| fps                | 10            |
| n_updates          | 6279          |
| policy_entropy     | 2.2972016     |
| policy_loss        | 1.4591962e-05 |
| serial_timesteps   | 803712        |
| time_elapsed       | 5.01e+04      |
| total_timesteps    | 803712        |
| value_loss         | 0.0063255914  |
--------------------------------------
--------------------------------------
| approxkl           | 2.883764e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.21e+03      |
| ep_reward_mean     | 4.02          |
| explained_variance | 0.000719      |
| fps                | 24            |
| n_updates          | 6280          |
| policy_entropy     | 2.297234      |
| policy_loss        | -4.418567e-05 |
| serial_times

---------------------------------------
| approxkl           | 1.7630116e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.02           |
| explained_variance | 0.000255       |
| fps                | 24             |
| n_updates          | 6293           |
| policy_entropy     | 2.3117735      |
| policy_loss        | -0.00019004941 |
| serial_timesteps   | 805504         |
| time_elapsed       | 5.02e+04       |
| total_timesteps    | 805504         |
| value_loss         | 0.0056896037   |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 1.031195e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.02           |
| explained_variance | -0.00055       |
| fps                | 11             |
| n_updates          | 6294           |
| policy_entropy     | 2.3133776      |
| policy_loss        | -0.000

----------------------------------------
| approxkl           | 2.3988578e-06   |
| clipfrac           | 0.0             |
| ep_len_mean        | 3.21e+03        |
| ep_reward_mean     | 4.02            |
| explained_variance | 0.000105        |
| fps                | 23              |
| n_updates          | 6307            |
| policy_entropy     | 2.3192987       |
| policy_loss        | -1.09826215e-05 |
| serial_timesteps   | 807296          |
| time_elapsed       | 5.04e+04        |
| total_timesteps    | 807296          |
| value_loss         | 0.0042072795    |
----------------------------------------
Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 1.236564e-05  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.25e+03      |
| ep_reward_mean     | 4.06          |
| explained_variance | 0.351         |
| fps                | 8             |
| n_updates          | 6308          |


--------------------------------------
| approxkl           | 5.6526223e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.25e+03      |
| ep_reward_mean     | 4.06          |
| explained_variance | -0.00188      |
| fps                | 22            |
| n_updates          | 6321          |
| policy_entropy     | 2.340374      |
| policy_loss        | 6.2942505e-05 |
| serial_timesteps   | 809088        |
| time_elapsed       | 5.05e+04      |
| total_timesteps    | 809088        |
| value_loss         | 0.006470098   |
--------------------------------------
--------------------------------------
| approxkl           | 1.0888582e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.25e+03      |
| ep_reward_mean     | 4.06          |
| explained_variance | -0.00108      |
| fps                | 23            |
| n_updates          | 6322          |
| policy_entropy     | 2.3437726     |
| policy_loss        | 4.2915344e-06 |
| serial_timesteps   | 80

---------------------------------------
| approxkl           | 3.7618017e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.1            |
| explained_variance | 8.06e-05       |
| fps                | 20             |
| n_updates          | 6335           |
| policy_entropy     | 2.3470652      |
| policy_loss        | -4.0284358e-05 |
| serial_timesteps   | 810880         |
| time_elapsed       | 5.06e+04       |
| total_timesteps    | 810880         |
| value_loss         | 0.006902163    |
---------------------------------------
Stage done
--------------------------------------
| approxkl           | 4.52091e-07   |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.1           |
| explained_variance | -0.00103      |
| fps                | 9             |
| n_updates          | 6336          |
| policy_entropy     | 2.3486462     |
| policy_loss        | -9.820983e-05 |

Round done
--------------------------------------
| approxkl           | 3.291768e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.1           |
| explained_variance | -0.00025      |
| fps                | 10            |
| n_updates          | 6349          |
| policy_entropy     | 2.3598816     |
| policy_loss        | 1.3895333e-06 |
| serial_timesteps   | 812672        |
| time_elapsed       | 5.07e+04      |
| total_timesteps    | 812672        |
| value_loss         | 0.0051767402  |
--------------------------------------
--------------------------------------
| approxkl           | 6.6711004e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.1           |
| explained_variance | -0.000126     |
| fps                | 23            |
| n_updates          | 6350          |
| policy_entropy     | 2.360276      |
| policy_loss        | -8.657947e-05 |
| serial_times

Round done
----------------------------------------
| approxkl           | 1.1187145e-06   |
| clipfrac           | 0.0             |
| ep_len_mean        | 3.28e+03        |
| ep_reward_mean     | 4.17            |
| explained_variance | -0.000204       |
| fps                | 10              |
| n_updates          | 6363            |
| policy_entropy     | 2.3563972       |
| policy_loss        | -0.000117902644 |
| serial_timesteps   | 814464          |
| time_elapsed       | 5.08e+04        |
| total_timesteps    | 814464          |
| value_loss         | 0.005121654     |
----------------------------------------
---------------------------------------
| approxkl           | 8.4233943e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.28e+03       |
| ep_reward_mean     | 4.17           |
| explained_variance | 0.00144        |
| fps                | 22             |
| n_updates          | 6364           |
| policy_entropy     | 2.3563914      |
| policy_loss 

---------------------------------------
| approxkl           | 1.4867767e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.28e+03       |
| ep_reward_mean     | 4.2            |
| explained_variance | 0.00338        |
| fps                | 22             |
| n_updates          | 6377           |
| policy_entropy     | 2.3211384      |
| policy_loss        | -0.00010464154 |
| serial_timesteps   | 816256         |
| time_elapsed       | 5.09e+04       |
| total_timesteps    | 816256         |
| value_loss         | 0.006312942    |
---------------------------------------
Round done
--------------------------------------
| approxkl           | 4.3742403e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.28e+03      |
| ep_reward_mean     | 4.2           |
| explained_variance | 0.00362       |
| fps                | 10            |
| n_updates          | 6378          |
| policy_entropy     | 2.3276408     |
| policy_loss        | 8.50521e-05   |

Stage done
---------------------------------------
| approxkl           | 2.6069023e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.28e+03       |
| ep_reward_mean     | 4.2            |
| explained_variance | -0.000311      |
| fps                | 9              |
| n_updates          | 6391           |
| policy_entropy     | 2.3523853      |
| policy_loss        | -6.0658902e-05 |
| serial_timesteps   | 818048         |
| time_elapsed       | 5.11e+04       |
| total_timesteps    | 818048         |
| value_loss         | 0.0022530416   |
---------------------------------------
--------------------------------------
| approxkl           | 5.3744947e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.28e+03      |
| ep_reward_mean     | 4.2           |
| explained_variance | -0.000985     |
| fps                | 23            |
| n_updates          | 6392          |
| policy_entropy     | 2.352376      |
| policy_loss        | -7.982552e-05 |

--------------------------------------
| approxkl           | 2.9955006e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.28e+03      |
| ep_reward_mean     | 4.19          |
| explained_variance | -0.000747     |
| fps                | 23            |
| n_updates          | 6405          |
| policy_entropy     | 2.352687      |
| policy_loss        | 7.5249e-05    |
| serial_timesteps   | 819840        |
| time_elapsed       | 5.12e+04      |
| total_timesteps    | 819840        |
| value_loss         | 0.007750557   |
--------------------------------------
---------------------------------------
| approxkl           | 2.0277957e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.28e+03       |
| ep_reward_mean     | 4.19           |
| explained_variance | 0.00653        |
| fps                | 23             |
| n_updates          | 6406           |
| policy_entropy     | 2.3571079      |
| policy_loss        | -0.00012025982 |
| serial_timest

--------------------------------------
| approxkl           | 5.77065e-07   |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.28e+03      |
| ep_reward_mean     | 4.19          |
| explained_variance | -0.000551     |
| fps                | 23            |
| n_updates          | 6419          |
| policy_entropy     | 2.3734365     |
| policy_loss        | 2.8107315e-05 |
| serial_timesteps   | 821632        |
| time_elapsed       | 5.13e+04      |
| total_timesteps    | 821632        |
| value_loss         | 0.002607607   |
--------------------------------------
Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 1.4875e-06    |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.28e+03      |
| ep_reward_mean     | 4.16          |
| explained_variance | 0.0117        |
| fps                | 7             |
| n_updates          | 6420          |
| policy_entropy     | 2.36361

Stage done
--------------------------------------
| approxkl           | 1.7800807e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.28e+03      |
| ep_reward_mean     | 4.16          |
| explained_variance | 0.00113       |
| fps                | 9             |
| n_updates          | 6433          |
| policy_entropy     | 2.3758001     |
| policy_loss        | -9.804964e-06 |
| serial_timesteps   | 823424        |
| time_elapsed       | 5.14e+04      |
| total_timesteps    | 823424        |
| value_loss         | 0.0034580398  |
--------------------------------------
---------------------------------------
| approxkl           | 1.8869392e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.28e+03       |
| ep_reward_mean     | 4.16           |
| explained_variance | 0.00106        |
| fps                | 23             |
| n_updates          | 6434           |
| policy_entropy     | 2.3758717      |
| policy_loss        | -2.3234636e-05 |
| se

Round done
---------------------------------------
| approxkl           | 6.460842e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.25e+03       |
| ep_reward_mean     | 4.13           |
| explained_variance | 0.00773        |
| fps                | 11             |
| n_updates          | 6447           |
| policy_entropy     | 2.3105216      |
| policy_loss        | -0.00016138516 |
| serial_timesteps   | 825216         |
| time_elapsed       | 5.15e+04       |
| total_timesteps    | 825216         |
| value_loss         | 0.003967367    |
---------------------------------------
---------------------------------------
| approxkl           | 3.2765304e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.25e+03       |
| ep_reward_mean     | 4.13           |
| explained_variance | -0.003         |
| fps                | 23             |
| n_updates          | 6448           |
| policy_entropy     | 2.3381212      |
| policy_loss        | -0.000

---------------------------------------
| approxkl           | 2.055549e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.25e+03       |
| ep_reward_mean     | 4.13           |
| explained_variance | 0.000711       |
| fps                | 22             |
| n_updates          | 6461           |
| policy_entropy     | 2.3836806      |
| policy_loss        | -7.5483695e-05 |
| serial_timesteps   | 827008         |
| time_elapsed       | 5.16e+04       |
| total_timesteps    | 827008         |
| value_loss         | 0.004153771    |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 2.611448e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.25e+03       |
| ep_reward_mean     | 4.13           |
| explained_variance | -0.000441      |
| fps                | 8              |
| n_updates          | 6462           |
| policy_entropy     | 2.3841047      |
| policy_loss        | -1.601

---------------------------------------
| approxkl           | 2.8336058e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.24e+03       |
| ep_reward_mean     | 4.17           |
| explained_variance | -0.00802       |
| fps                | 23             |
| n_updates          | 6475           |
| policy_entropy     | 2.345406       |
| policy_loss        | -0.00015107938 |
| serial_timesteps   | 828800         |
| time_elapsed       | 5.17e+04       |
| total_timesteps    | 828800         |
| value_loss         | 0.006357971    |
---------------------------------------
Round done
--------------------------------------
| approxkl           | 1.5057421e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.24e+03      |
| ep_reward_mean     | 4.17          |
| explained_variance | -0.00945      |
| fps                | 10            |
| n_updates          | 6476          |
| policy_entropy     | 2.359567      |
| policy_loss        | 1.5832484e-05 |

Stage done
---------------------------------------
| approxkl           | 5.715495e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.24e+03       |
| ep_reward_mean     | 4.17           |
| explained_variance | 0.000354       |
| fps                | 8              |
| n_updates          | 6489           |
| policy_entropy     | 2.3890066      |
| policy_loss        | -0.00013544224 |
| serial_timesteps   | 830592         |
| time_elapsed       | 5.18e+04       |
| total_timesteps    | 830592         |
| value_loss         | 0.009267721    |
---------------------------------------
--------------------------------------
| approxkl           | 1.0322676e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.24e+03      |
| ep_reward_mean     | 4.17          |
| explained_variance | 0.000447      |
| fps                | 21            |
| n_updates          | 6490          |
| policy_entropy     | 2.3882542     |
| policy_loss        | -9.955838e-05 |

--------------------------------------
| approxkl           | 5.800758e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.15          |
| explained_variance | -0.0116       |
| fps                | 22            |
| n_updates          | 6503          |
| policy_entropy     | 2.3087769     |
| policy_loss        | -0.0004417561 |
| serial_timesteps   | 832384        |
| time_elapsed       | 5.2e+04       |
| total_timesteps    | 832384        |
| value_loss         | 0.006608811   |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 8.683595e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.15           |
| explained_variance | 0.00121        |
| fps                | 10             |
| n_updates          | 6504           |
| policy_entropy     | 2.3370495      |
| policy_loss        | -0.00017616898 |
| se

--------------------------------------
| approxkl           | 3.1533108e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.23e+03      |
| ep_reward_mean     | 4.1           |
| explained_variance | 0.00135       |
| fps                | 23            |
| n_updates          | 6517          |
| policy_entropy     | 2.3769581     |
| policy_loss        | 6.221235e-06  |
| serial_timesteps   | 834176        |
| time_elapsed       | 5.21e+04      |
| total_timesteps    | 834176        |
| value_loss         | 0.0036497978  |
--------------------------------------
---------------------------------------
| approxkl           | 3.85068e-07    |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 4.1            |
| explained_variance | 0.00137        |
| fps                | 23             |
| n_updates          | 6518           |
| policy_entropy     | 2.3769841      |
| policy_loss        | -0.00018416159 |
| serial_timest

--------------------------------------
| approxkl           | 2.7168726e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.23e+03      |
| ep_reward_mean     | 4.1           |
| explained_variance | 0.000105      |
| fps                | 22            |
| n_updates          | 6531          |
| policy_entropy     | 2.363995      |
| policy_loss        | -4.608743e-05 |
| serial_timesteps   | 835968        |
| time_elapsed       | 5.22e+04      |
| total_timesteps    | 835968        |
| value_loss         | 0.0046270844  |
--------------------------------------
---------------------------------------
| approxkl           | 2.4525643e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 4.1            |
| explained_variance | 4.4e-05        |
| fps                | 23             |
| n_updates          | 6532           |
| policy_entropy     | 2.361913       |
| policy_loss        | -0.00024478696 |
| serial_timest

---------------------------------------
| approxkl           | 1.1975494e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.24e+03       |
| ep_reward_mean     | 4.13           |
| explained_variance | 0.00143        |
| fps                | 20             |
| n_updates          | 6545           |
| policy_entropy     | 2.3451278      |
| policy_loss        | -0.00020072237 |
| serial_timesteps   | 837760         |
| time_elapsed       | 5.23e+04       |
| total_timesteps    | 837760         |
| value_loss         | 0.0031981731   |
---------------------------------------
Round done
--------------------------------------
| approxkl           | 8.6451587e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.24e+03      |
| ep_reward_mean     | 4.13          |
| explained_variance | -0.000767     |
| fps                | 9             |
| n_updates          | 6546          |
| policy_entropy     | 2.3459613     |
| policy_loss        | -9.934604e-05 |

---------------------------------------
| approxkl           | 8.711768e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.25e+03       |
| ep_reward_mean     | 4.13           |
| explained_variance | 0.00167        |
| fps                | 21             |
| n_updates          | 6559           |
| policy_entropy     | 2.3550553      |
| policy_loss        | -8.8441186e-05 |
| serial_timesteps   | 839552         |
| time_elapsed       | 5.24e+04       |
| total_timesteps    | 839552         |
| value_loss         | 0.0054795276   |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 6.6193604e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.25e+03       |
| ep_reward_mean     | 4.13           |
| explained_variance | 0.00151        |
| fps                | 10             |
| n_updates          | 6560           |
| policy_entropy     | 2.3549576      |
| policy_loss        | -1.643

--------------------------------------
| approxkl           | 2.6120185e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.25e+03      |
| ep_reward_mean     | 4.13          |
| explained_variance | -0.000334     |
| fps                | 23            |
| n_updates          | 6573          |
| policy_entropy     | 2.368257      |
| policy_loss        | 4.503876e-06  |
| serial_timesteps   | 841344        |
| time_elapsed       | 5.26e+04      |
| total_timesteps    | 841344        |
| value_loss         | 0.0054842616  |
--------------------------------------
Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
-------------------------------------
| approxkl           | 6.375276e-07 |
| clipfrac           | 0.0          |
| ep_len_mean        | 3.23e+03     |
| ep_reward_mean     | 4.04         |
| explained_variance | 0.0431       |
| fps                | 8            |
| n_updates          | 6574         |
| policy_entropy     | 2.354268     |


--------------------------------------
| approxkl           | 1.4773326e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.23e+03      |
| ep_reward_mean     | 4.04          |
| explained_variance | -0.00021      |
| fps                | 23            |
| n_updates          | 6587          |
| policy_entropy     | 2.354459      |
| policy_loss        | -0.0002020523 |
| serial_timesteps   | 843136        |
| time_elapsed       | 5.27e+04      |
| total_timesteps    | 843136        |
| value_loss         | 0.010309586   |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 1.8341467e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 4.04           |
| explained_variance | -0.00205       |
| fps                | 10             |
| n_updates          | 6588           |
| policy_entropy     | 2.3529413      |
| policy_loss        | -0.00010332465 |
| se

---------------------------------------
| approxkl           | 2.5990892e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.01           |
| explained_variance | 0.00441        |
| fps                | 22             |
| n_updates          | 6601           |
| policy_entropy     | 2.3375492      |
| policy_loss        | -0.00018807873 |
| serial_timesteps   | 844928         |
| time_elapsed       | 5.28e+04       |
| total_timesteps    | 844928         |
| value_loss         | 0.0034098884   |
---------------------------------------
---------------------------------------
| approxkl           | 1.9255353e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 4.01           |
| explained_variance | 0.006          |
| fps                | 22             |
| n_updates          | 6602           |
| policy_entropy     | 2.337173       |
| policy_loss        | -0.00014581252 |


--------------------------------------
| approxkl           | 2.4784292e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 4             |
| explained_variance | 0.0113        |
| fps                | 22            |
| n_updates          | 6615          |
| policy_entropy     | 2.3316655     |
| policy_loss        | -0.0003100671 |
| serial_timesteps   | 846720        |
| time_elapsed       | 5.29e+04      |
| total_timesteps    | 846720        |
| value_loss         | 0.0033284973  |
--------------------------------------
Stage done
--------------------------------------
| approxkl           | 1.913679e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 4             |
| explained_variance | 0.00287       |
| fps                | 10            |
| n_updates          | 6616          |
| policy_entropy     | 2.3367488     |
| policy_loss        | 5.2690506e-05 |
| serial_times

Round done
--------------------------------------
| approxkl           | 1.7725932e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 4             |
| explained_variance | -0.000407     |
| fps                | 10            |
| n_updates          | 6629          |
| policy_entropy     | 2.3379636     |
| policy_loss        | -7.83205e-05  |
| serial_timesteps   | 848512        |
| time_elapsed       | 5.3e+04       |
| total_timesteps    | 848512        |
| value_loss         | 0.005392245   |
--------------------------------------
---------------------------------------
| approxkl           | 1.297106e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 4              |
| explained_variance | 0.000233       |
| fps                | 22             |
| n_updates          | 6630           |
| policy_entropy     | 2.3374538      |
| policy_loss        | -0.00016903505 |
| se

In [None]:
# Save the agent
model.save(modelFolder+"2Msteps_action+_add")

In [None]:
%tensorboard --logdir ./tensorBoardFolder/

In [None]:
observation = env.reset()
states = None

while True:

    action, states = model.predict(observation, states, deterministic=False)
    action_prob = model.action_probability(observation, states)
    print("Action probabilities = ", action_prob)
    print("Max action = ", np.argmax(action_prob))
    print("Action = ", action)
    input("Pausa")
    
    observation, reward, done, info = env.step(action)
    if done:
        observation = env.reset()
        states = None
        
env.close()

In [None]:
observation = env.reset()
states = None

cumulativeEpRew = 0.0
cumulativeEpRewAll = []
cumulativeTotRew = 0.0

maxNumEp = 100
currNumEp = 0

while currNumEp < maxNumEp:

    action, states = model.predict(observation, states, deterministic=False)
    action_prob = model.action_probability(observation, states)
    print("Action probabilities = ", action_prob)
    print("Max action = ", np.argmax(action_prob))
    print("Action = ", action)
    
    observation, reward, done, info = env.step(action)
    
    cumulativeEpRew += reward
    
    if np.any(done):
        currNumEp += 1
        print("Ep. # = ", currNumEp)
        print("Ep. Cumulative Rew # = ", cumulativeEpRew)
        sys.stdout.flush()
        cumulativeEpRewAll.append(cumulativeEpRew)
        cumulativeTotRew += cumulativeEpRew
        cumulativeEpRew = 0.0

        observation = env.reset()
        states = None

print("Mean cumulative reward = ", cumulativeTotRew/maxNumEp)    
print("Mean cumulative reward = ", np.mean(cumulativeEpRewAll))    
print("Std cumulative reward = ", np.std(cumulativeEpRewAll))   
    
env.close()