In [1]:
gameFolder = "DOA++-MAME"

In [2]:
import sys, os
import time
timeDepSeed = int((time.time()-int(time.time()-0.5))*1000)

sys.path.append(os.path.join(os.path.abspath(''), '../'))   
sys.path.append(os.path.join(os.path.abspath(''), '../../../games',gameFolder))   

tensorBoardFolder = "./ppo2_TB_CustCnnLnLstm/"
modelFolder = "ppo2_Model_CustCnnLnLstm/"

In [3]:
from makeDiambraEnv import *

import tensorflow as tf

%load_ext tensorboard

from stable_baselines.common.policies import RecurrentActorCriticPolicy
from stable_baselines.common.policies import *
from stable_baselines import PPO2
from stable_baselines.common.evaluation import evaluate_policy
from stable_baselines.common.callbacks import BaseCallback

In [4]:
class CustomCnnLstmPolicy(RecurrentActorCriticPolicy):
    """
    Policy object that implements actor critic, using LSTMs.

    :param sess: (TensorFlow session) The current TensorFlow session
    :param ob_space: (Gym Space) The observation space of the environment
    :param ac_space: (Gym Space) The action space of the environment
    :param n_env: (int) The number of environments to run
    :param n_steps: (int) The number of steps to run for each environment
    :param n_batch: (int) The number of batch to run (n_envs * n_steps)
    :param n_lstm: (int) The number of LSTM cells (for recurrent policies)
    :param reuse: (bool) If the policy is reusable or not
    :param layers: ([int]) The size of the Neural network before the LSTM layer  (if None, default to [64, 64])
    :param net_arch: (list) Specification of the actor-critic policy network architecture. Notation similar to the
        format described in mlp_extractor but with additional support for a 'lstm' entry in the shared network part.
    :param act_fun: (tf.func) the activation function to use in the neural network.
    :param cnn_extractor: (function (TensorFlow Tensor, ``**kwargs``): (TensorFlow Tensor)) the CNN feature extraction
    :param layer_norm: (bool) Whether or not to use layer normalizing LSTMs
    :param feature_extraction: (str) The feature extraction type ("cnn" or "mlp")
    :param kwargs: (dict) Extra keyword arguments for the nature CNN feature extraction
    """

    recurrent = True

    def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, n_lstm=256, reuse=False, 
                 layers=None, net_arch=None, act_fun=tf.tanh, cnn_extractor=nature_cnn, 
                 layer_norm=True, feature_extraction="cnn", **kwargs):
        
        # state_shape = [n_lstm * 2] dim because of the cell and hidden states of the LSTM
        super(CustomCnnLstmPolicy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch,
                                         state_shape=(2 * n_lstm, ), reuse=reuse,
                                         scale=(feature_extraction == "cnn"))

      
        self._kwargs_check(feature_extraction, kwargs)
       
        frames = self.processed_obs[:,:,:,0:4]
        additional_input = self.processed_obs[:,:,:,4]
        additional_input = tf.layers.flatten(additional_input)
        additional_input = additional_input[:,1:149]
        
        if layers is None:
            layers = [64, 64]
        else:
            warnings.warn("The layers parameter is deprecated. Use the net_arch parameter instead.")

        with tf.variable_scope("model", reuse=reuse):
            
            # Frames (CNN)
            extracted_features_frames = cnn_extractor(frames, **kwargs)
            
            # Additional (Additional Info)
            for i, layer_size in enumerate(layers):
                extracted_features_addinfo = act_fun(linear(additional_input, 'pi_fc' + str(i), 
                                                            n_hidden=layer_size, init_scale=np.sqrt(2)))
            
            extracted_features = tf.concat([extracted_features_frames, extracted_features_addinfo], 1)
            
            #print("Extracted feat frames")
            #print(extracted_features_frames)
            #print("Extracted feat additional info")
            #print(extracted_features_addinfo)
            #print("Extracted feat")
            #print(extracted_features)
            input_sequence = batch_to_seq(extracted_features, self.n_env, n_steps)
            #print("input_sequence")
            #print(input_sequence)
            #print("dones_ph")
            #print(self.dones_ph)
            masks = batch_to_seq(self.dones_ph, self.n_env, n_steps)
            #print("masks")
            #print(masks)
            rnn_output, self.snew = lstm(input_sequence, masks, self.states_ph, 'lstm1', n_hidden=n_lstm,
                                         layer_norm=layer_norm)
            #print("rnn_output")
            #print(rnn_output)
            #input("Pause")
            rnn_output = seq_to_batch(rnn_output)
            value_fn = linear(rnn_output, 'vf', 1)

            self._proba_distribution, self._policy, self.q_value = \
                self.pdtype.proba_distribution_from_latent(rnn_output, rnn_output)

        self._value_fn = value_fn
        
        self._setup_init()

    def step(self, obs, state=None, mask=None, deterministic=False):
        if deterministic:
            return self.sess.run([self.deterministic_action, self.value_flat, self.snew, self.neglogp],
                                 {self.obs_ph: obs, self.states_ph: state, self.dones_ph: mask})
        else:
            return self.sess.run([self.action, self.value_flat, self.snew, self.neglogp],
                                 {self.obs_ph: obs, self.states_ph: state, self.dones_ph: mask})

    def proba_step(self, obs, state=None, mask=None):
        return self.sess.run(self.policy_proba, {self.obs_ph: obs, self.states_ph: state, self.dones_ph: mask})

    def value(self, obs, state=None, mask=None):
        return self.sess.run(self.value_flat, {self.obs_ph: obs, self.states_ph: state, self.dones_ph: mask})


In [5]:
diambraKwargs = {}
diambraKwargs["roms_path"] = "../../../roms/MAMEToolkit/roms/"
diambraKwargs["binary_path"] = "../../../../customMAME/"
diambraKwargs["player"] = "P1"
diambraKwargs["frame_ratio"] = 3
#diambraKwargs["render"] =True
#diambraKwargs["throttle"] = False
#diambraKwargs["sound"] = False 
#diambraKwargs["character"] ="Random"
diambraKwargs["character"] = "Kasumi"

wrapperKwargs = {}
wrapperKwargs["frame_stack"] = 4
wrapperKwargs["clip_rewards"] = False
wrapperKwargs["normalize_rewards"] = True
wrapperKwargs["scale"] = True
wrapperKwargs["hw_obs_resize"] = [256, 256]

#keyToAdd = None
keyToAdd = []
keyToAdd.append("actionsBuf")
#keyToAdd.append("player")
keyToAdd.append("healthP1")
keyToAdd.append("healthP2")
keyToAdd.append("positionP1")
keyToAdd.append("positionP2")
#keyToAdd.append("winsP1")
#keyToAdd.append("winsP2")

numEnv=1

env = make_diambra_env(diambraMame, env_prefix="Train", num_env=numEnv, seed=timeDepSeed, 
                       continue_game = -2, diambra_kwargs = diambraKwargs, 
                       wrapper_kwargs = wrapperKwargs, key_to_add = keyToAdd)

# OR 
#env = make_diambra_env(diambraMame, num_env=2, seed=0, diambra_kwargs = diambraKwargs)
# Frame-stacking with 4 frames
#env = VecFrameStack(env, n_stack=4)


Env_id =  Train0
Continue rule =  False
Player = P1 , Character = Kasumi
Noop action N =  11


In [None]:
print("Obs_space = ", env.observation_space)
print("Obs_space type = ", env.observation_space.dtype)
print("Obs_space high = ", env.observation_space.high)
print("Obs_space low = ", env.observation_space.low)

In [None]:
print("Act_space = ", env.action_space)
print("Act_space type = ", env.action_space.dtype)
print("Act_space n = ", env.action_space.n)

In [6]:
policyKwargs={}
policyKwargs["layers"] = [128, 256]

# Initialize the model, 1 env
#model = PPO2(CustomCnnLstmPolicy, env, nminibatches=1, verbose=1,
#             tensorboard_log=tensorBoardFolder, policy_kwargs=policyKwargs, gamma = 0.9)

#OR

# Load the trained agent, 1 env
model = PPO2.load(modelFolder+"4_3Msteps_action+_add", env=env, tensorboard_log=tensorBoardFolder, 
                  policy_kwargs=policyKwargs, gamma = 0.94)





Instructions for updating:
Use keras.layers.flatten instead.





Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where




In [None]:
# Train the agent
time_steps = 10000000
model.learn(total_timesteps=time_steps)


Setting difficulty = 3
Starting game
Selecting Kasumi
Waiting for fight to start
---------------------------------------
| approxkl           | 2.5212782e-06  |
| clipfrac           | 0.0            |
| explained_variance | 0.0804         |
| fps                | 2              |
| n_updates          | 1              |
| policy_entropy     | 2.3138862      |
| policy_loss        | -0.00044920295 |
| serial_timesteps   | 128            |
| time_elapsed       | 3.65e-05       |
| total_timesteps    | 128            |
| value_loss         | 0.0015072183   |
---------------------------------------
--------------------------------------
| approxkl           | 1.5704218e-06 |
| clipfrac           | 0.0           |
| explained_variance | -0.0114       |
| fps                | 23            |
| n_updates          | 2             |
| policy_entropy     | 2.358452      |
| policy_loss        | -0.0002909582 |
| serial_timesteps   | 256           |
| time_elapsed       | 45.3          |
| total_

Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
---------------------------------------
| approxkl           | 1.4725254e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 2.12e+03       |
| ep_reward_mean     | 1.21           |
| explained_variance | 0.202          |
| fps                | 8              |
| n_updates          | 17             |
| policy_entropy     | 2.3247085      |
| policy_loss        | -0.00018822774 |
| serial_timesteps   | 2176           |
| time_elapsed       | 159            |
| total_timesteps    | 2176           |
| value_loss         | 0.013077898    |
---------------------------------------
---------------------------------------
| approxkl           | 1.9347656e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 2.12e+03       |
| ep_reward_mean     | 1.21           |
| explained_variance | 0.0171         |
| fps                | 23             |
| n_updates          | 18             |
| polic

--------------------------------------
| approxkl           | 3.4124795e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 2.12e+03      |
| ep_reward_mean     | 1.21          |
| explained_variance | -5.95e-05     |
| fps                | 21            |
| n_updates          | 31            |
| policy_entropy     | 2.3631635     |
| policy_loss        | -0.0001604706 |
| serial_timesteps   | 3968          |
| time_elapsed       | 283           |
| total_timesteps    | 3968          |
| value_loss         | 0.007088354   |
--------------------------------------
--------------------------------------
| approxkl           | 4.9124344e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 2.12e+03      |
| ep_reward_mean     | 1.21          |
| explained_variance | 0.00174       |
| fps                | 23            |
| n_updates          | 32            |
| policy_entropy     | 2.362805      |
| policy_loss        | -8.691102e-05 |
| serial_timesteps   | 40

--------------------------------------
| approxkl           | 5.4171375e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 1.74e+03      |
| ep_reward_mean     | 1.78          |
| explained_variance | -0.00314      |
| fps                | 24            |
| n_updates          | 45            |
| policy_entropy     | 2.3594697     |
| policy_loss        | -8.163694e-05 |
| serial_timesteps   | 5760          |
| time_elapsed       | 400           |
| total_timesteps    | 5760          |
| value_loss         | 0.0015930282  |
--------------------------------------
---------------------------------------
| approxkl           | 4.2738517e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 1.74e+03       |
| ep_reward_mean     | 1.78           |
| explained_variance | 0.0112         |
| fps                | 23             |
| n_updates          | 46             |
| policy_entropy     | 2.36108        |
| policy_loss        | -4.2196363e-05 |
| serial_timest

--------------------------------------
| approxkl           | 3.3337687e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 1.74e+03      |
| ep_reward_mean     | 1.78          |
| explained_variance | -0.00161      |
| fps                | 23            |
| n_updates          | 59            |
| policy_entropy     | 2.3672385     |
| policy_loss        | -8.16565e-05  |
| serial_timesteps   | 7552          |
| time_elapsed       | 502           |
| total_timesteps    | 7552          |
| value_loss         | 0.008260985   |
--------------------------------------
--------------------------------------
| approxkl           | 2.8108443e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 1.74e+03      |
| ep_reward_mean     | 1.78          |
| explained_variance | 0.00285       |
| fps                | 23            |
| n_updates          | 60            |
| policy_entropy     | 2.3666558     |
| policy_loss        | -3.874302e-06 |
| serial_timesteps   | 76

---------------------------------------
| approxkl           | 5.2998723e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 1.74e+03       |
| ep_reward_mean     | 1.78           |
| explained_variance | -5.89e-05      |
| fps                | 23             |
| n_updates          | 73             |
| policy_entropy     | 2.369536       |
| policy_loss        | -0.00013098493 |
| serial_timesteps   | 9344           |
| time_elapsed       | 608            |
| total_timesteps    | 9344           |
| value_loss         | 0.007242611    |
---------------------------------------
--------------------------------------
| approxkl           | 4.1254344e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 1.74e+03      |
| ep_reward_mean     | 1.78          |
| explained_variance | 0.000121      |
| fps                | 22            |
| n_updates          | 74            |
| policy_entropy     | 2.368696      |
| policy_loss        | -7.919036e-05 |
| serial_t

Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
---------------------------------------
| approxkl           | 1.158209e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 2.77e+03       |
| ep_reward_mean     | 2.95           |
| explained_variance | 0.244          |
| fps                | 8              |
| n_updates          | 87             |
| policy_entropy     | 2.3186707      |
| policy_loss        | -0.00024767406 |
| serial_timesteps   | 11136          |
| time_elapsed       | 714            |
| total_timesteps    | 11136          |
| value_loss         | 0.0048559653   |
---------------------------------------
--------------------------------------
| approxkl           | 7.217251e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 2.77e+03      |
| ep_reward_mean     | 2.95          |
| explained_variance | -0.00715      |
| fps                | 23            |
| n_updates          | 88            |
| policy_entrop

---------------------------------------
| approxkl           | 1.4953491e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 2.77e+03       |
| ep_reward_mean     | 2.95           |
| explained_variance | -0.0016        |
| fps                | 23             |
| n_updates          | 101            |
| policy_entropy     | 2.3629146      |
| policy_loss        | -3.9699487e-05 |
| serial_timesteps   | 12928          |
| time_elapsed       | 838            |
| total_timesteps    | 12928          |
| value_loss         | 0.0022261692   |
---------------------------------------
---------------------------------------
| approxkl           | 8.8129366e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 2.77e+03       |
| ep_reward_mean     | 2.95           |
| explained_variance | 0.00165        |
| fps                | 23             |
| n_updates          | 102            |
| policy_entropy     | 2.3647         |
| policy_loss        | -2.7567148e-05 |


Stage done
---------------------------------------
| approxkl           | 7.762151e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 2.8e+03        |
| ep_reward_mean     | 3.65           |
| explained_variance | 0.014          |
| fps                | 9              |
| n_updates          | 115            |
| policy_entropy     | 2.3469932      |
| policy_loss        | -3.9938837e-05 |
| serial_timesteps   | 14720          |
| time_elapsed       | 951            |
| total_timesteps    | 14720          |
| value_loss         | 0.0041873585   |
---------------------------------------
---------------------------------------
| approxkl           | 9.147632e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 2.8e+03        |
| ep_reward_mean     | 3.65           |
| explained_variance | 0.0111         |
| fps                | 23             |
| n_updates          | 116            |
| policy_entropy     | 2.3477063      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 1.4824018e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 2.63e+03      |
| ep_reward_mean     | 3.61          |
| explained_variance | 0.0168        |
| fps                | 23            |
| n_updates          | 129           |
| policy_entropy     | 2.3342707     |
| policy_loss        | -6.311014e-05 |
| serial_timesteps   | 16512         |
| time_elapsed       | 1.06e+03      |
| total_timesteps    | 16512         |
| value_loss         | 0.005711567   |
--------------------------------------
---------------------------------------
| approxkl           | 1.5061967e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 2.63e+03       |
| ep_reward_mean     | 3.61           |
| explained_variance | 0.00571        |
| fps                | 23             |
| n_updates          | 130            |
| policy_entropy     | 2.3347306      |
| policy_loss        | -0.00013340684 |
| serial_timest

---------------------------------------
| approxkl           | 1.8018595e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 2.63e+03       |
| ep_reward_mean     | 3.61           |
| explained_variance | -0.000552      |
| fps                | 23             |
| n_updates          | 143            |
| policy_entropy     | 2.3365967      |
| policy_loss        | -5.1893294e-05 |
| serial_timesteps   | 18304          |
| time_elapsed       | 1.17e+03       |
| total_timesteps    | 18304          |
| value_loss         | 0.004783319    |
---------------------------------------
---------------------------------------
| approxkl           | 1.1053794e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 2.63e+03       |
| ep_reward_mean     | 3.61           |
| explained_variance | 0.00029        |
| fps                | 24             |
| n_updates          | 144            |
| policy_entropy     | 2.3353086      |
| policy_loss        | -0.00020148978 |


Round done
--------------------------------------
| approxkl           | 6.410572e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 2.7e+03       |
| ep_reward_mean     | 3.42          |
| explained_variance | 0.00494       |
| fps                | 10            |
| n_updates          | 157           |
| policy_entropy     | 2.3280363     |
| policy_loss        | -6.377511e-05 |
| serial_timesteps   | 20096         |
| time_elapsed       | 1.27e+03      |
| total_timesteps    | 20096         |
| value_loss         | 0.009364916   |
--------------------------------------
---------------------------------------
| approxkl           | 5.5193743e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 2.7e+03        |
| ep_reward_mean     | 3.42           |
| explained_variance | -0.00486       |
| fps                | 24             |
| n_updates          | 158            |
| policy_entropy     | 2.3300204      |
| policy_loss        | -1.7600134e-05 |
| se

---------------------------------------
| approxkl           | 2.4238642e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 2.7e+03        |
| ep_reward_mean     | 3.42           |
| explained_variance | -0.00105       |
| fps                | 24             |
| n_updates          | 171            |
| policy_entropy     | 2.331492       |
| policy_loss        | -6.4991415e-05 |
| serial_timesteps   | 21888          |
| time_elapsed       | 1.38e+03       |
| total_timesteps    | 21888          |
| value_loss         | 0.006778884    |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 2.107015e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 2.7e+03        |
| ep_reward_mean     | 3.42           |
| explained_variance | -0.00201       |
| fps                | 9              |
| n_updates          | 172            |
| policy_entropy     | 2.3312159      |
| policy_loss        | -4.570

---------------------------------------
| approxkl           | 7.263109e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 2.7e+03        |
| ep_reward_mean     | 3.42           |
| explained_variance | 0.000151       |
| fps                | 23             |
| n_updates          | 185            |
| policy_entropy     | 2.3320758      |
| policy_loss        | -0.00014389865 |
| serial_timesteps   | 23680          |
| time_elapsed       | 1.49e+03       |
| total_timesteps    | 23680          |
| value_loss         | 0.0074818074   |
---------------------------------------
Round done
--------------------------------------
| approxkl           | 4.6910844e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 2.7e+03       |
| ep_reward_mean     | 3.42          |
| explained_variance | -0.000155     |
| fps                | 10            |
| n_updates          | 186           |
| policy_entropy     | 2.3311286     |
| policy_loss        | 3.4141354e-05 |

Round done
--------------------------------------
| approxkl           | 7.666865e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 2.7e+03       |
| ep_reward_mean     | 3.42          |
| explained_variance | -0.000315     |
| fps                | 10            |
| n_updates          | 199           |
| policy_entropy     | 2.3359241     |
| policy_loss        | -9.907037e-05 |
| serial_timesteps   | 25472         |
| time_elapsed       | 1.6e+03       |
| total_timesteps    | 25472         |
| value_loss         | 0.016496744   |
--------------------------------------
-------------------------------------
| approxkl           | 5.350146e-07 |
| clipfrac           | 0.0          |
| ep_len_mean        | 2.7e+03      |
| ep_reward_mean     | 3.42         |
| explained_variance | -0.000318    |
| fps                | 23           |
| n_updates          | 200          |
| policy_entropy     | 2.3358746    |
| policy_loss        | -6.82883e-05 |
| serial_timesteps   | 2

--------------------------------------
| approxkl           | 8.7545493e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.21e+03      |
| ep_reward_mean     | 3.7           |
| explained_variance | 0.0013        |
| fps                | 24            |
| n_updates          | 213           |
| policy_entropy     | 2.3362627     |
| policy_loss        | 1.2885779e-05 |
| serial_timesteps   | 27264         |
| time_elapsed       | 1.72e+03      |
| total_timesteps    | 27264         |
| value_loss         | 0.0033321767  |
--------------------------------------
---------------------------------------
| approxkl           | 9.0981314e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 3.7            |
| explained_variance | 0.000395       |
| fps                | 24             |
| n_updates          | 214            |
| policy_entropy     | 2.3372817      |
| policy_loss        | -0.00022215862 |
| serial_timest

--------------------------------------
| approxkl           | 9.1775644e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.21e+03      |
| ep_reward_mean     | 3.7           |
| explained_variance | -0.000564     |
| fps                | 24            |
| n_updates          | 227           |
| policy_entropy     | 2.3341312     |
| policy_loss        | -1.823157e-05 |
| serial_timesteps   | 29056         |
| time_elapsed       | 1.82e+03      |
| total_timesteps    | 29056         |
| value_loss         | 0.0058726138  |
--------------------------------------
---------------------------------------
| approxkl           | 5.293576e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 3.7            |
| explained_variance | -0.00327       |
| fps                | 24             |
| n_updates          | 228            |
| policy_entropy     | 2.333689       |
| policy_loss        | -1.2885779e-05 |
| serial_timest

--------------------------------------
| approxkl           | 9.756e-07     |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 3.62          |
| explained_variance | -0.00142      |
| fps                | 24            |
| n_updates          | 241           |
| policy_entropy     | 2.3146615     |
| policy_loss        | 7.5045973e-06 |
| serial_timesteps   | 30848         |
| time_elapsed       | 1.93e+03      |
| total_timesteps    | 30848         |
| value_loss         | 0.003059108   |
--------------------------------------
---------------------------------------
| approxkl           | 5.895233e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 3.62           |
| explained_variance | 0.00261        |
| fps                | 24             |
| n_updates          | 242            |
| policy_entropy     | 2.316351       |
| policy_loss        | -0.00010298751 |
| serial_timest

---------------------------------------
| approxkl           | 9.223171e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 3.62           |
| explained_variance | -0.00133       |
| fps                | 24             |
| n_updates          | 255            |
| policy_entropy     | 2.305829       |
| policy_loss        | -0.00020482764 |
| serial_timesteps   | 32640          |
| time_elapsed       | 2.03e+03       |
| total_timesteps    | 32640          |
| value_loss         | 0.0027973228   |
---------------------------------------
--------------------------------------
| approxkl           | 8.298175e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.23e+03      |
| ep_reward_mean     | 3.62          |
| explained_variance | -0.0208       |
| fps                | 24            |
| n_updates          | 256           |
| policy_entropy     | 2.3115644     |
| policy_loss        | -8.501485e-05 |
| serial_t

--------------------------------------
| approxkl           | 7.149084e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.23e+03      |
| ep_reward_mean     | 3.62          |
| explained_variance | 0.0015        |
| fps                | 23            |
| n_updates          | 269           |
| policy_entropy     | 2.3307643     |
| policy_loss        | -4.036352e-06 |
| serial_timesteps   | 34432         |
| time_elapsed       | 2.14e+03      |
| total_timesteps    | 34432         |
| value_loss         | 0.005983786   |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 3.641801e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.23e+03      |
| ep_reward_mean     | 3.62          |
| explained_variance | 0.000182      |
| fps                | 11            |
| n_updates          | 270           |
| policy_entropy     | 2.3304172     |
| policy_loss        | -4.345551e-05 |
| serial_times

--------------------------------------
| approxkl           | 1.7634727e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.22e+03      |
| ep_reward_mean     | 3.61          |
| explained_variance | -0.00427      |
| fps                | 24            |
| n_updates          | 283           |
| policy_entropy     | 2.3089604     |
| policy_loss        | 1.9439496e-05 |
| serial_timesteps   | 36224         |
| time_elapsed       | 2.25e+03      |
| total_timesteps    | 36224         |
| value_loss         | 0.001964339   |
--------------------------------------
--------------------------------------
| approxkl           | 6.18605e-07   |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.22e+03      |
| ep_reward_mean     | 3.61          |
| explained_variance | -0.0032       |
| fps                | 24            |
| n_updates          | 284           |
| policy_entropy     | 2.3099751     |
| policy_loss        | -3.696233e-05 |
| serial_timesteps   | 36

---------------------------------------
| approxkl           | 1.71064e-07    |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 3.61           |
| explained_variance | 0.000365       |
| fps                | 23             |
| n_updates          | 297            |
| policy_entropy     | 2.3196328      |
| policy_loss        | -2.0647421e-05 |
| serial_timesteps   | 38016          |
| time_elapsed       | 2.35e+03       |
| total_timesteps    | 38016          |
| value_loss         | 0.010647641    |
---------------------------------------
---------------------------------------
| approxkl           | 4.4309195e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 3.61           |
| explained_variance | 0.00129        |
| fps                | 24             |
| n_updates          | 298            |
| policy_entropy     | 2.3196669      |
| policy_loss        | -0.00015695114 |


---------------------------------------
| approxkl           | 3.920618e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 3.61           |
| explained_variance | -0.000285      |
| fps                | 24             |
| n_updates          | 311            |
| policy_entropy     | 2.3262715      |
| policy_loss        | -2.7583912e-05 |
| serial_timesteps   | 39808          |
| time_elapsed       | 2.46e+03       |
| total_timesteps    | 39808          |
| value_loss         | 0.007916663    |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 5.2098846e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 3.61           |
| explained_variance | 0.00122        |
| fps                | 9              |
| n_updates          | 312            |
| policy_entropy     | 2.3267913      |
| policy_loss        | -0.000

---------------------------------------
| approxkl           | 3.700107e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.42e+03       |
| ep_reward_mean     | 3.36           |
| explained_variance | 0.0067         |
| fps                | 24             |
| n_updates          | 325            |
| policy_entropy     | 2.3106523      |
| policy_loss        | -0.00031798705 |
| serial_timesteps   | 41600          |
| time_elapsed       | 2.57e+03       |
| total_timesteps    | 41600          |
| value_loss         | 0.00391603     |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 3.131618e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.42e+03       |
| ep_reward_mean     | 3.36           |
| explained_variance | -0.0054        |
| fps                | 9              |
| n_updates          | 326            |
| policy_entropy     | 2.309895       |
| policy_loss        | -0.000

---------------------------------------
| approxkl           | 8.469484e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.42e+03       |
| ep_reward_mean     | 3.36           |
| explained_variance | -0.000805      |
| fps                | 22             |
| n_updates          | 339            |
| policy_entropy     | 2.3182008      |
| policy_loss        | -0.00022207387 |
| serial_timesteps   | 43392          |
| time_elapsed       | 2.68e+03       |
| total_timesteps    | 43392          |
| value_loss         | 0.00857233     |
---------------------------------------
---------------------------------------
| approxkl           | 1.2083051e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.42e+03       |
| ep_reward_mean     | 3.36           |
| explained_variance | 0.000214       |
| fps                | 21             |
| n_updates          | 340            |
| policy_entropy     | 2.3172193      |
| policy_loss        | -0.00013548881 |


---------------------------------------
| approxkl           | 1.5792257e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.4e+03        |
| ep_reward_mean     | 3.4            |
| explained_variance | -0.00715       |
| fps                | 24             |
| n_updates          | 353            |
| policy_entropy     | 2.2944584      |
| policy_loss        | -0.00029960647 |
| serial_timesteps   | 45184          |
| time_elapsed       | 2.79e+03       |
| total_timesteps    | 45184          |
| value_loss         | 0.004808799    |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 4.8203647e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.4e+03        |
| ep_reward_mean     | 3.4            |
| explained_variance | -0.00116       |
| fps                | 11             |
| n_updates          | 354            |
| policy_entropy     | 2.2933903      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 9.855785e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.34e+03      |
| ep_reward_mean     | 3.47          |
| explained_variance | 0.00156       |
| fps                | 22            |
| n_updates          | 367           |
| policy_entropy     | 2.254221      |
| policy_loss        | -0.0007502083 |
| serial_timesteps   | 46976         |
| time_elapsed       | 2.9e+03       |
| total_timesteps    | 46976         |
| value_loss         | 0.0073367786  |
--------------------------------------
--------------------------------------
| approxkl           | 5.9593367e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.34e+03      |
| ep_reward_mean     | 3.47          |
| explained_variance | 0.0113        |
| fps                | 22            |
| n_updates          | 368           |
| policy_entropy     | 2.2807493     |
| policy_loss        | 0.00017833337 |
| serial_timesteps   | 47

----------------------------------------
| approxkl           | 1.6232539e-06   |
| clipfrac           | 0.0             |
| ep_len_mean        | 3.34e+03        |
| ep_reward_mean     | 3.47            |
| explained_variance | 0.00125         |
| fps                | 22              |
| n_updates          | 381             |
| policy_entropy     | 2.3120906       |
| policy_loss        | -0.000115029514 |
| serial_timesteps   | 48768           |
| time_elapsed       | 3.02e+03        |
| total_timesteps    | 48768           |
| value_loss         | 0.00915424      |
----------------------------------------
---------------------------------------
| approxkl           | 1.118912e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.34e+03       |
| ep_reward_mean     | 3.47           |
| explained_variance | -0.000302      |
| fps                | 21             |
| n_updates          | 382            |
| policy_entropy     | 2.3109221      |
| policy_loss        | -0

Round done
--------------------------------------
| approxkl           | 1.0211204e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.28e+03      |
| ep_reward_mean     | 3.47          |
| explained_variance | -0.00692      |
| fps                | 10            |
| n_updates          | 395           |
| policy_entropy     | 2.2821126     |
| policy_loss        | 5.4836273e-06 |
| serial_timesteps   | 50560         |
| time_elapsed       | 3.13e+03      |
| total_timesteps    | 50560         |
| value_loss         | 0.007090887   |
--------------------------------------
--------------------------------------
| approxkl           | 3.4487923e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.28e+03      |
| ep_reward_mean     | 3.47          |
| explained_variance | 0.000757      |
| fps                | 23            |
| n_updates          | 396           |
| policy_entropy     | 2.2823606     |
| policy_loss        | -7.53589e-05  |
| serial_times

---------------------------------------
| approxkl           | 4.978494e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.28e+03       |
| ep_reward_mean     | 3.47           |
| explained_variance | -0.000662      |
| fps                | 22             |
| n_updates          | 409            |
| policy_entropy     | 2.284975       |
| policy_loss        | -0.00011959113 |
| serial_timesteps   | 52352          |
| time_elapsed       | 3.25e+03       |
| total_timesteps    | 52352          |
| value_loss         | 0.0050844513   |
---------------------------------------
---------------------------------------
| approxkl           | 7.077035e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.28e+03       |
| ep_reward_mean     | 3.47           |
| explained_variance | -0.00193       |
| fps                | 23             |
| n_updates          | 410            |
| policy_entropy     | 2.2854304      |
| policy_loss        | -0.00016024988 |


---------------------------------------
| approxkl           | 7.7428945e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.3e+03        |
| ep_reward_mean     | 3.52           |
| explained_variance | -0.00492       |
| fps                | 23             |
| n_updates          | 423            |
| policy_entropy     | 2.2814448      |
| policy_loss        | -0.00011030026 |
| serial_timesteps   | 54144          |
| time_elapsed       | 3.36e+03       |
| total_timesteps    | 54144          |
| value_loss         | 0.006367731    |
---------------------------------------
---------------------------------------
| approxkl           | 5.451475e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.3e+03        |
| ep_reward_mean     | 3.52           |
| explained_variance | 0.00275        |
| fps                | 23             |
| n_updates          | 424            |
| policy_entropy     | 2.2824373      |
| policy_loss        | -4.4647604e-05 |


Round done
---------------------------------------
| approxkl           | 1.5043242e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.3e+03        |
| ep_reward_mean     | 3.52           |
| explained_variance | 0.000307       |
| fps                | 10             |
| n_updates          | 437            |
| policy_entropy     | 2.28578        |
| policy_loss        | -0.00010251999 |
| serial_timesteps   | 55936          |
| time_elapsed       | 3.48e+03       |
| total_timesteps    | 55936          |
| value_loss         | 0.015680922    |
---------------------------------------
---------------------------------------
| approxkl           | 1.5655971e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.3e+03        |
| ep_reward_mean     | 3.52           |
| explained_variance | -0.000508      |
| fps                | 23             |
| n_updates          | 438            |
| policy_entropy     | 2.2879877      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 1.684067e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.31e+03      |
| ep_reward_mean     | 3.54          |
| explained_variance | 0.00356       |
| fps                | 23            |
| n_updates          | 451           |
| policy_entropy     | 2.274862      |
| policy_loss        | -5.351752e-05 |
| serial_timesteps   | 57728         |
| time_elapsed       | 3.59e+03      |
| total_timesteps    | 57728         |
| value_loss         | 0.006106374   |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 2.058818e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.31e+03      |
| ep_reward_mean     | 3.54          |
| explained_variance | -0.0022       |
| fps                | 11            |
| n_updates          | 452           |
| policy_entropy     | 2.2759366     |
| policy_loss        | -5.629845e-05 |
| serial_times

Stage done
---------------------------------------
| approxkl           | 2.0795176e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.24e+03       |
| ep_reward_mean     | 3.51           |
| explained_variance | 0.00867        |
| fps                | 9              |
| n_updates          | 465            |
| policy_entropy     | 2.2740297      |
| policy_loss        | -1.7157523e-05 |
| serial_timesteps   | 59520          |
| time_elapsed       | 3.71e+03       |
| total_timesteps    | 59520          |
| value_loss         | 0.0037380636   |
---------------------------------------
--------------------------------------
| approxkl           | 2.227767e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.24e+03      |
| ep_reward_mean     | 3.51          |
| explained_variance | 0.00585       |
| fps                | 23            |
| n_updates          | 466           |
| policy_entropy     | 2.2759824     |
| policy_loss        | -4.654005e-05 |

Round done
---------------------------------------
| approxkl           | 1.379218e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.24e+03       |
| ep_reward_mean     | 3.51           |
| explained_variance | -0.00158       |
| fps                | 10             |
| n_updates          | 479            |
| policy_entropy     | 2.2904878      |
| policy_loss        | -0.00016156584 |
| serial_timesteps   | 61312          |
| time_elapsed       | 3.83e+03       |
| total_timesteps    | 61312          |
| value_loss         | 0.013108166    |
---------------------------------------
---------------------------------------
| approxkl           | 1.0806342e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.24e+03       |
| ep_reward_mean     | 3.51           |
| explained_variance | -0.000438      |
| fps                | 22             |
| n_updates          | 480            |
| policy_entropy     | 2.288582       |
| policy_loss        | -1.459

---------------------------------------
| approxkl           | 1.2957222e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.14e+03       |
| ep_reward_mean     | 3.65           |
| explained_variance | -0.0064        |
| fps                | 22             |
| n_updates          | 493            |
| policy_entropy     | 2.2495089      |
| policy_loss        | -0.00011992827 |
| serial_timesteps   | 63104          |
| time_elapsed       | 3.95e+03       |
| total_timesteps    | 63104          |
| value_loss         | 0.0037326366   |
---------------------------------------
Round done
----------------------------------------
| approxkl           | 1.9153233e-06   |
| clipfrac           | 0.0             |
| ep_len_mean        | 3.14e+03        |
| ep_reward_mean     | 3.65            |
| explained_variance | -0.00947        |
| fps                | 10              |
| n_updates          | 494             |
| policy_entropy     | 2.2629035       |
| policy_loss       

Stage done
--------------------------------------
| approxkl           | 1.055468e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.14e+03      |
| ep_reward_mean     | 3.65          |
| explained_variance | -0.0013       |
| fps                | 8             |
| n_updates          | 507           |
| policy_entropy     | 2.2817492     |
| policy_loss        | -0.0002111774 |
| serial_timesteps   | 64896         |
| time_elapsed       | 4.06e+03      |
| total_timesteps    | 64896         |
| value_loss         | 0.002820666   |
--------------------------------------
---------------------------------------
| approxkl           | 2.1424762e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.14e+03       |
| ep_reward_mean     | 3.65           |
| explained_variance | -0.00194       |
| fps                | 22             |
| n_updates          | 508            |
| policy_entropy     | 2.2787142      |
| policy_loss        | -0.00017388165 |
| se

---------------------------------------
| approxkl           | 1.4467792e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.12e+03       |
| ep_reward_mean     | 3.79           |
| explained_variance | -0.00191       |
| fps                | 23             |
| n_updates          | 521            |
| policy_entropy     | 2.2573905      |
| policy_loss        | -0.00014324859 |
| serial_timesteps   | 66688          |
| time_elapsed       | 4.18e+03       |
| total_timesteps    | 66688          |
| value_loss         | 0.0074876365   |
---------------------------------------
Round done
-------------------------------------
| approxkl           | 2.177304e-06 |
| clipfrac           | 0.0          |
| ep_len_mean        | 3.12e+03     |
| ep_reward_mean     | 3.79         |
| explained_variance | 0.00775      |
| fps                | 10           |
| n_updates          | 522          |
| policy_entropy     | 2.2558398    |
| policy_loss        | -7.89389e-05 |
| serial_

Round done
---------------------------------------
| approxkl           | 9.599767e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.12e+03       |
| ep_reward_mean     | 3.79           |
| explained_variance | 5.7e-05        |
| fps                | 10             |
| n_updates          | 535            |
| policy_entropy     | 2.236853       |
| policy_loss        | -0.00026392937 |
| serial_timesteps   | 68480          |
| time_elapsed       | 4.29e+03       |
| total_timesteps    | 68480          |
| value_loss         | 0.0094716465   |
---------------------------------------
---------------------------------------
| approxkl           | 1.2726043e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.12e+03       |
| ep_reward_mean     | 3.79           |
| explained_variance | 0.000535       |
| fps                | 23             |
| n_updates          | 536            |
| policy_entropy     | 2.236293       |
| policy_loss        | -3.104

---------------------------------------
| approxkl           | 7.0023657e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.12e+03       |
| ep_reward_mean     | 3.66           |
| explained_variance | -0.000472      |
| fps                | 22             |
| n_updates          | 549            |
| policy_entropy     | 2.2023096      |
| policy_loss        | -8.1855804e-05 |
| serial_timesteps   | 70272          |
| time_elapsed       | 4.41e+03       |
| total_timesteps    | 70272          |
| value_loss         | 0.014251992    |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 7.7844066e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.12e+03       |
| ep_reward_mean     | 3.66           |
| explained_variance | -0.00583       |
| fps                | 9              |
| n_updates          | 550            |
| policy_entropy     | 2.204782       |
| policy_loss        | -9.405

---------------------------------------
| approxkl           | 2.9407884e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.12e+03       |
| ep_reward_mean     | 3.66           |
| explained_variance | -0.000177      |
| fps                | 23             |
| n_updates          | 563            |
| policy_entropy     | 2.1956449      |
| policy_loss        | -0.00010892376 |
| serial_timesteps   | 72064          |
| time_elapsed       | 4.52e+03       |
| total_timesteps    | 72064          |
| value_loss         | 0.007946764    |
---------------------------------------
Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
---------------------------------------
| approxkl           | 3.065309e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.14e+03       |
| ep_reward_mean     | 3.69           |
| explained_variance | -0.0599        |
| fps                | 8              |
| n_updates          | 564            |
| polic

Stage done
---------------------------------------
| approxkl           | 2.0573111e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.14e+03       |
| ep_reward_mean     | 3.69           |
| explained_variance | -0.00177       |
| fps                | 9              |
| n_updates          | 577            |
| policy_entropy     | 2.1573877      |
| policy_loss        | -0.00011895783 |
| serial_timesteps   | 73856          |
| time_elapsed       | 4.63e+03       |
| total_timesteps    | 73856          |
| value_loss         | 0.008504234    |
---------------------------------------
--------------------------------------
| approxkl           | 1.3376562e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.14e+03      |
| ep_reward_mean     | 3.69          |
| explained_variance | 0.00226       |
| fps                | 22            |
| n_updates          | 578           |
| policy_entropy     | 2.1594086     |
| policy_loss        | -9.147916e-05 |

Round done
---------------------------------------
| approxkl           | 1.0305788e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.14e+03       |
| ep_reward_mean     | 3.69           |
| explained_variance | 0.00109        |
| fps                | 10             |
| n_updates          | 591            |
| policy_entropy     | 2.184713       |
| policy_loss        | -0.00015264098 |
| serial_timesteps   | 75648          |
| time_elapsed       | 4.74e+03       |
| total_timesteps    | 75648          |
| value_loss         | 0.01046082     |
---------------------------------------
---------------------------------------
| approxkl           | 6.16402e-06    |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.14e+03       |
| ep_reward_mean     | 3.69           |
| explained_variance | -0.00128       |
| fps                | 22             |
| n_updates          | 592            |
| policy_entropy     | 2.1810734      |
| policy_loss        | -0.000

Round done
---------------------------------------
| approxkl           | 2.9453877e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 3.7            |
| explained_variance | 0.0102         |
| fps                | 10             |
| n_updates          | 605            |
| policy_entropy     | 2.1537268      |
| policy_loss        | -0.00023563765 |
| serial_timesteps   | 77440          |
| time_elapsed       | 4.86e+03       |
| total_timesteps    | 77440          |
| value_loss         | 0.0040603844   |
---------------------------------------
---------------------------------------
| approxkl           | 2.1586418e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 3.7            |
| explained_variance | 0.0201         |
| fps                | 22             |
| n_updates          | 606            |
| policy_entropy     | 2.164883       |
| policy_loss        | -0.000

---------------------------------------
| approxkl           | 5.0306613e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 3.7            |
| explained_variance | 0.00315        |
| fps                | 22             |
| n_updates          | 619            |
| policy_entropy     | 2.1894965      |
| policy_loss        | -0.00014188746 |
| serial_timesteps   | 79232          |
| time_elapsed       | 4.98e+03       |
| total_timesteps    | 79232          |
| value_loss         | 0.0073411963   |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 1.9778129e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 3.7            |
| explained_variance | -0.00081       |
| fps                | 9              |
| n_updates          | 620            |
| policy_entropy     | 2.1898394      |
| policy_loss        | -0.000

Stage done
---------------------------------------
| approxkl           | 9.511256e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 3.63           |
| explained_variance | -0.00148       |
| fps                | 9              |
| n_updates          | 633            |
| policy_entropy     | 2.1522257      |
| policy_loss        | -0.00010915287 |
| serial_timesteps   | 81024          |
| time_elapsed       | 5.09e+03       |
| total_timesteps    | 81024          |
| value_loss         | 0.005179546    |
---------------------------------------
--------------------------------------
| approxkl           | 1.4545672e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.21e+03      |
| ep_reward_mean     | 3.63          |
| explained_variance | -5.07e-05     |
| fps                | 23            |
| n_updates          | 634           |
| policy_entropy     | 2.1576357     |
| policy_loss        | -2.421951e-05 |

Round done
---------------------------------------
| approxkl           | 9.935825e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 3.63           |
| explained_variance | -0.000276      |
| fps                | 11             |
| n_updates          | 647            |
| policy_entropy     | 2.2274017      |
| policy_loss        | -0.00019150972 |
| serial_timesteps   | 82816          |
| time_elapsed       | 5.21e+03       |
| total_timesteps    | 82816          |
| value_loss         | 0.0045852866   |
---------------------------------------
--------------------------------------
| approxkl           | 7.09501e-07   |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.21e+03      |
| ep_reward_mean     | 3.63          |
| explained_variance | 0.00242       |
| fps                | 23            |
| n_updates          | 648           |
| policy_entropy     | 2.2301326     |
| policy_loss        | -4.292652e-05 |

----------------------------------------
| approxkl           | 8.278653e-07    |
| clipfrac           | 0.0             |
| ep_len_mean        | 3.22e+03        |
| ep_reward_mean     | 3.69            |
| explained_variance | 0.00215         |
| fps                | 23              |
| n_updates          | 661             |
| policy_entropy     | 2.2297297       |
| policy_loss        | -0.000116368756 |
| serial_timesteps   | 84608           |
| time_elapsed       | 5.33e+03        |
| total_timesteps    | 84608           |
| value_loss         | 0.0063363467    |
----------------------------------------
--------------------------------------
| approxkl           | 1.0698266e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.22e+03      |
| ep_reward_mean     | 3.69          |
| explained_variance | -0.0113       |
| fps                | 23            |
| n_updates          | 662           |
| policy_entropy     | 2.2358692     |
| policy_loss        | -9.685755e-

Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 3.07356e-06   |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 3.73          |
| explained_variance | 0.309         |
| fps                | 8             |
| n_updates          | 675           |
| policy_entropy     | 2.1571956     |
| policy_loss        | -0.0003486164 |
| serial_timesteps   | 86400         |
| time_elapsed       | 5.44e+03      |
| total_timesteps    | 86400         |
| value_loss         | 0.007954479   |
--------------------------------------
--------------------------------------
| approxkl           | 7.4502583e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 3.73          |
| explained_variance | 0.0446        |
| fps                | 23            |
| n_updates          | 676           |
| policy_entropy     | 2.13123

--------------------------------------
| approxkl           | 1.0910601e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 3.73          |
| explained_variance | -0.00351      |
| fps                | 22            |
| n_updates          | 689           |
| policy_entropy     | 2.2720091     |
| policy_loss        | -7.260591e-06 |
| serial_timesteps   | 88192         |
| time_elapsed       | 5.56e+03      |
| total_timesteps    | 88192         |
| value_loss         | 0.0038634904  |
--------------------------------------
---------------------------------------
| approxkl           | 2.0274751e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 3.73           |
| explained_variance | -0.000516      |
| fps                | 22             |
| n_updates          | 690            |
| policy_entropy     | 2.2752595      |
| policy_loss        | -0.00021135435 |
| serial_timest

---------------------------------------
| approxkl           | 4.469498e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 3.73           |
| explained_variance | 0.0315         |
| fps                | 23             |
| n_updates          | 703            |
| policy_entropy     | 2.2647135      |
| policy_loss        | -0.00016815774 |
| serial_timesteps   | 89984          |
| time_elapsed       | 5.68e+03       |
| total_timesteps    | 89984          |
| value_loss         | 0.0022823547   |
---------------------------------------
--------------------------------------
| approxkl           | 7.557995e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 3.73          |
| explained_variance | -0.0241       |
| fps                | 22            |
| n_updates          | 704           |
| policy_entropy     | 2.2718408     |
| policy_loss        | -8.483231e-05 |
| serial_t

Stage done
---------------------------------------
| approxkl           | 1.271511e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.14e+03       |
| ep_reward_mean     | 3.66           |
| explained_variance | -0.0127        |
| fps                | 9              |
| n_updates          | 717            |
| policy_entropy     | 2.2687554      |
| policy_loss        | -0.00028692652 |
| serial_timesteps   | 91776          |
| time_elapsed       | 5.79e+03       |
| total_timesteps    | 91776          |
| value_loss         | 0.0068136053   |
---------------------------------------
---------------------------------------
| approxkl           | 1.7822206e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.14e+03       |
| ep_reward_mean     | 3.66           |
| explained_variance | -0.00937       |
| fps                | 22             |
| n_updates          | 718            |
| policy_entropy     | 2.2708094      |
| policy_loss        | -0.000

Round done
---------------------------------------
| approxkl           | 1.1072952e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.14e+03       |
| ep_reward_mean     | 3.66           |
| explained_variance | 0.000244       |
| fps                | 10             |
| n_updates          | 731            |
| policy_entropy     | 2.2978578      |
| policy_loss        | -0.00017503276 |
| serial_timesteps   | 93568          |
| time_elapsed       | 5.91e+03       |
| total_timesteps    | 93568          |
| value_loss         | 0.0067372327   |
---------------------------------------
---------------------------------------
| approxkl           | 1.1327785e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.14e+03       |
| ep_reward_mean     | 3.66           |
| explained_variance | 0.00111        |
| fps                | 21             |
| n_updates          | 732            |
| policy_entropy     | 2.2999         |
| policy_loss        | -0.000

---------------------------------------
| approxkl           | 3.6464866e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.15e+03       |
| ep_reward_mean     | 3.66           |
| explained_variance | -0.00564       |
| fps                | 22             |
| n_updates          | 745            |
| policy_entropy     | 2.290811       |
| policy_loss        | -1.3623387e-05 |
| serial_timesteps   | 95360          |
| time_elapsed       | 6.03e+03       |
| total_timesteps    | 95360          |
| value_loss         | 0.002797651    |
---------------------------------------
---------------------------------------
| approxkl           | 3.005249e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.15e+03       |
| ep_reward_mean     | 3.66           |
| explained_variance | -0.00217       |
| fps                | 22             |
| n_updates          | 746            |
| policy_entropy     | 2.2947705      |
| policy_loss        | -1.8095598e-05 |


---------------------------------------
| approxkl           | 3.9120658e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.15e+03       |
| ep_reward_mean     | 3.66           |
| explained_variance | 0.00278        |
| fps                | 23             |
| n_updates          | 759            |
| policy_entropy     | 2.3212516      |
| policy_loss        | -7.2589144e-05 |
| serial_timesteps   | 97152          |
| time_elapsed       | 6.14e+03       |
| total_timesteps    | 97152          |
| value_loss         | 0.0028519686   |
---------------------------------------
--------------------------------------
| approxkl           | 3.3595472e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.15e+03      |
| ep_reward_mean     | 3.66          |
| explained_variance | 0.000158      |
| fps                | 23            |
| n_updates          | 760           |
| policy_entropy     | 2.322604      |
| policy_loss        | 2.6635826e-06 |
| serial_t

--------------------------------------
| approxkl           | 5.2957694e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.15e+03      |
| ep_reward_mean     | 3.66          |
| explained_variance | 0.0005        |
| fps                | 24            |
| n_updates          | 773           |
| policy_entropy     | 2.32726       |
| policy_loss        | 1.771003e-05  |
| serial_timesteps   | 98944         |
| time_elapsed       | 6.25e+03      |
| total_timesteps    | 98944         |
| value_loss         | 0.0076586884  |
--------------------------------------
--------------------------------------
| approxkl           | 5.489045e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.15e+03      |
| ep_reward_mean     | 3.66          |
| explained_variance | -0.000703     |
| fps                | 23            |
| n_updates          | 774           |
| policy_entropy     | 2.328567      |
| policy_loss        | -6.783381e-05 |
| serial_timesteps   | 99

---------------------------------------
| approxkl           | 1.6220057e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 3.71           |
| explained_variance | -0.0141        |
| fps                | 24             |
| n_updates          | 787            |
| policy_entropy     | 2.2964034      |
| policy_loss        | -3.5353005e-06 |
| serial_timesteps   | 100736         |
| time_elapsed       | 6.36e+03       |
| total_timesteps    | 100736         |
| value_loss         | 0.002339023    |
---------------------------------------
Stage done
--------------------------------------
| approxkl           | 4.4797926e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.23e+03      |
| ep_reward_mean     | 3.71          |
| explained_variance | 0.0102        |
| fps                | 9             |
| n_updates          | 788           |
| policy_entropy     | 2.2991698     |
| policy_loss        | 1.5297905e-05 |

---------------------------------------
| approxkl           | 2.8227505e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 3.72           |
| explained_variance | -0.0141        |
| fps                | 23             |
| n_updates          | 801            |
| policy_entropy     | 2.2371323      |
| policy_loss        | -0.00013490207 |
| serial_timesteps   | 102528         |
| time_elapsed       | 6.47e+03       |
| total_timesteps    | 102528         |
| value_loss         | 0.006648401    |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 2.658766e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 3.72           |
| explained_variance | -0.0174        |
| fps                | 11             |
| n_updates          | 802            |
| policy_entropy     | 2.2497272      |
| policy_loss        | -0.000

---------------------------------------
| approxkl           | 1.9880503e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 3.72           |
| explained_variance | 0.00457        |
| fps                | 24             |
| n_updates          | 815            |
| policy_entropy     | 2.2720928      |
| policy_loss        | -2.9778108e-05 |
| serial_timesteps   | 104320         |
| time_elapsed       | 6.58e+03       |
| total_timesteps    | 104320         |
| value_loss         | 0.0045653824   |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 2.3822382e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 3.72           |
| explained_variance | 0.00259        |
| fps                | 9              |
| n_updates          | 816            |
| policy_entropy     | 2.2733319      |
| policy_loss        | -5.082

--------------------------------------
| approxkl           | 1.0344344e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.19e+03      |
| ep_reward_mean     | 3.83          |
| explained_variance | 0.00498       |
| fps                | 23            |
| n_updates          | 829           |
| policy_entropy     | 2.2536812     |
| policy_loss        | -5.891174e-05 |
| serial_timesteps   | 106112        |
| time_elapsed       | 6.69e+03      |
| total_timesteps    | 106112        |
| value_loss         | 0.0042262333  |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 2.1375695e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 3.83           |
| explained_variance | 0.0132         |
| fps                | 11             |
| n_updates          | 830            |
| policy_entropy     | 2.257078       |
| policy_loss        | -0.00023285672 |
| se

Round done
---------------------------------------
| approxkl           | 1.0380091e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 3.83           |
| explained_variance | 0.000172       |
| fps                | 10             |
| n_updates          | 843            |
| policy_entropy     | 2.2635121      |
| policy_loss        | -0.00012618862 |
| serial_timesteps   | 107904         |
| time_elapsed       | 6.8e+03        |
| total_timesteps    | 107904         |
| value_loss         | 0.006000798    |
---------------------------------------
---------------------------------------
| approxkl           | 1.8174306e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 3.83           |
| explained_variance | -0.00154       |
| fps                | 23             |
| n_updates          | 844            |
| policy_entropy     | 2.263784       |
| policy_loss        | -0.000

---------------------------------------
| approxkl           | 1.9483875e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 3.94           |
| explained_variance | -0.0407        |
| fps                | 23             |
| n_updates          | 857            |
| policy_entropy     | 2.1941872      |
| policy_loss        | -3.3635646e-05 |
| serial_timesteps   | 109696         |
| time_elapsed       | 6.92e+03       |
| total_timesteps    | 109696         |
| value_loss         | 0.009256015    |
---------------------------------------
--------------------------------------
| approxkl           | 3.559553e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.22e+03      |
| ep_reward_mean     | 3.94          |
| explained_variance | 0.00218       |
| fps                | 24            |
| n_updates          | 858           |
| policy_entropy     | 2.2126384     |
| policy_loss        | -0.0003841929 |
| serial_t

Round done
--------------------------------------
| approxkl           | 2.7212711e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.22e+03      |
| ep_reward_mean     | 3.94          |
| explained_variance | 0.00551       |
| fps                | 11            |
| n_updates          | 871           |
| policy_entropy     | 2.2766743     |
| policy_loss        | 7.274747e-05  |
| serial_timesteps   | 111488        |
| time_elapsed       | 7.02e+03      |
| total_timesteps    | 111488        |
| value_loss         | 0.01057082    |
--------------------------------------
--------------------------------------
| approxkl           | 8.656938e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.22e+03      |
| ep_reward_mean     | 3.94          |
| explained_variance | -0.00201      |
| fps                | 24            |
| n_updates          | 872           |
| policy_entropy     | 2.2802677     |
| policy_loss        | 2.8306618e-05 |
| serial_times

---------------------------------------
| approxkl           | 1.7485563e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 3.97           |
| explained_variance | -0.00422       |
| fps                | 23             |
| n_updates          | 885            |
| policy_entropy     | 2.2217066      |
| policy_loss        | -0.00012722239 |
| serial_timesteps   | 113280         |
| time_elapsed       | 7.13e+03       |
| total_timesteps    | 113280         |
| value_loss         | 0.008328706    |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 1.7534502e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 3.97           |
| explained_variance | 0.00316        |
| fps                | 11             |
| n_updates          | 886            |
| policy_entropy     | 2.234361       |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 5.550741e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.23e+03      |
| ep_reward_mean     | 3.97          |
| explained_variance | -0.00227      |
| fps                | 23            |
| n_updates          | 899           |
| policy_entropy     | 2.2728806     |
| policy_loss        | -3.299862e-05 |
| serial_timesteps   | 115072        |
| time_elapsed       | 7.25e+03      |
| total_timesteps    | 115072        |
| value_loss         | 0.003425192   |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 1.0635711e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.23e+03      |
| ep_reward_mean     | 3.97          |
| explained_variance | 0.00176       |
| fps                | 11            |
| n_updates          | 900           |
| policy_entropy     | 2.2748718     |
| policy_loss        | -0.0001831539 |
| serial_times

---------------------------------------
| approxkl           | 1.092113e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 3.97           |
| explained_variance | -0.000222      |
| fps                | 23             |
| n_updates          | 913            |
| policy_entropy     | 2.2805498      |
| policy_loss        | -0.00013803132 |
| serial_timesteps   | 116864         |
| time_elapsed       | 7.36e+03       |
| total_timesteps    | 116864         |
| value_loss         | 0.007141154    |
---------------------------------------
Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
---------------------------------------
| approxkl           | 6.4208193e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.25e+03       |
| ep_reward_mean     | 4.01           |
| explained_variance | 0.327          |
| fps                | 8              |
| n_updates          | 914            |
| polic

--------------------------------------
| approxkl           | 6.930554e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.25e+03      |
| ep_reward_mean     | 4.01          |
| explained_variance | 0.00421       |
| fps                | 24            |
| n_updates          | 927           |
| policy_entropy     | 2.2877195     |
| policy_loss        | -7.734448e-05 |
| serial_timesteps   | 118656        |
| time_elapsed       | 7.47e+03      |
| total_timesteps    | 118656        |
| value_loss         | 0.0045504533  |
--------------------------------------
--------------------------------------
| approxkl           | 2.0756889e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.25e+03      |
| ep_reward_mean     | 4.01          |
| explained_variance | 0.00207       |
| fps                | 24            |
| n_updates          | 928           |
| policy_entropy     | 2.2900653     |
| policy_loss        | -6.187707e-06 |
| serial_timesteps   | 11

---------------------------------------
| approxkl           | 1.2934086e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.24e+03       |
| ep_reward_mean     | 4              |
| explained_variance | 0.0243         |
| fps                | 23             |
| n_updates          | 941            |
| policy_entropy     | 2.2526655      |
| policy_loss        | -0.00013350695 |
| serial_timesteps   | 120448         |
| time_elapsed       | 7.58e+03       |
| total_timesteps    | 120448         |
| value_loss         | 0.009404155    |
---------------------------------------
--------------------------------------
| approxkl           | 1.4037257e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.24e+03      |
| ep_reward_mean     | 4             |
| explained_variance | -0.00376      |
| fps                | 23            |
| n_updates          | 942           |
| policy_entropy     | 2.258613      |
| policy_loss        | -8.670241e-05 |
| serial_t

Round done
---------------------------------------
| approxkl           | 5.524952e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.24e+03       |
| ep_reward_mean     | 4              |
| explained_variance | -0.000559      |
| fps                | 10             |
| n_updates          | 955            |
| policy_entropy     | 2.2499187      |
| policy_loss        | -0.00014178455 |
| serial_timesteps   | 122240         |
| time_elapsed       | 7.7e+03        |
| total_timesteps    | 122240         |
| value_loss         | 0.005949122    |
---------------------------------------
---------------------------------------
| approxkl           | 4.1064045e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.24e+03       |
| ep_reward_mean     | 4              |
| explained_variance | 0.000437       |
| fps                | 23             |
| n_updates          | 956            |
| policy_entropy     | 2.2464607      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 9.47145e-07   |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.23e+03      |
| ep_reward_mean     | 3.99          |
| explained_variance | -0.00501      |
| fps                | 22            |
| n_updates          | 969           |
| policy_entropy     | 2.2373996     |
| policy_loss        | -5.979091e-05 |
| serial_timesteps   | 124032        |
| time_elapsed       | 7.81e+03      |
| total_timesteps    | 124032        |
| value_loss         | 0.00578387    |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 5.9135965e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 3.99           |
| explained_variance | 0.00285        |
| fps                | 10             |
| n_updates          | 970            |
| policy_entropy     | 2.2388334      |
| policy_loss        | -7.8814104e-05 |
| se

---------------------------------------
| approxkl           | 7.5214774e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 3.93           |
| explained_variance | 0.00534        |
| fps                | 23             |
| n_updates          | 983            |
| policy_entropy     | 2.2458816      |
| policy_loss        | -0.00013940409 |
| serial_timesteps   | 125824         |
| time_elapsed       | 7.93e+03       |
| total_timesteps    | 125824         |
| value_loss         | 0.0020070015   |
---------------------------------------
---------------------------------------
| approxkl           | 1.0683887e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 3.93           |
| explained_variance | -0.00415       |
| fps                | 22             |
| n_updates          | 984            |
| policy_entropy     | 2.2487397      |
| policy_loss        | -0.00022199005 |


--------------------------------------
| approxkl           | 4.7780395e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.19e+03      |
| ep_reward_mean     | 3.96          |
| explained_variance | -0.0351       |
| fps                | 23            |
| n_updates          | 997           |
| policy_entropy     | 2.1288342     |
| policy_loss        | -5.091913e-05 |
| serial_timesteps   | 127616        |
| time_elapsed       | 8.05e+03      |
| total_timesteps    | 127616        |
| value_loss         | 0.0049915435  |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 3.8654666e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 3.96           |
| explained_variance | 0.000492       |
| fps                | 10             |
| n_updates          | 998            |
| policy_entropy     | 2.2241366      |
| policy_loss        | -0.00027407473 |
| se

---------------------------------------
| approxkl           | 1.4992269e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 3.96           |
| explained_variance | 0.00337        |
| fps                | 24             |
| n_updates          | 1011           |
| policy_entropy     | 2.299689       |
| policy_loss        | -0.00017222296 |
| serial_timesteps   | 129408         |
| time_elapsed       | 8.16e+03       |
| total_timesteps    | 129408         |
| value_loss         | 0.0025955352   |
---------------------------------------
---------------------------------------
| approxkl           | 2.1774695e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 3.96           |
| explained_variance | -0.00157       |
| fps                | 23             |
| n_updates          | 1012           |
| policy_entropy     | 2.2981515      |
| policy_loss        | -0.00032269955 |


Round done
--------------------------------------
| approxkl           | 4.4725812e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.19e+03      |
| ep_reward_mean     | 4.02          |
| explained_variance | 0.0297        |
| fps                | 10            |
| n_updates          | 1025          |
| policy_entropy     | 2.242075      |
| policy_loss        | -8.816272e-05 |
| serial_timesteps   | 131200        |
| time_elapsed       | 8.27e+03      |
| total_timesteps    | 131200        |
| value_loss         | 0.0058327382  |
--------------------------------------
--------------------------------------
| approxkl           | 1.1907727e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.19e+03      |
| ep_reward_mean     | 4.02          |
| explained_variance | 0.0106        |
| fps                | 24            |
| n_updates          | 1026          |
| policy_entropy     | 2.2511902     |
| policy_loss        | -0.0002198387 |
| serial_times

---------------------------------------
| approxkl           | 5.522534e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 4.02           |
| explained_variance | 0.00238        |
| fps                | 24             |
| n_updates          | 1039           |
| policy_entropy     | 2.2685156      |
| policy_loss        | -0.00016751699 |
| serial_timesteps   | 132992         |
| time_elapsed       | 8.38e+03       |
| total_timesteps    | 132992         |
| value_loss         | 0.010015855    |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 5.396639e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 4.02           |
| explained_variance | 0.0014         |
| fps                | 10             |
| n_updates          | 1040           |
| policy_entropy     | 2.2680063      |
| policy_loss        | -5.947

--------------------------------------
| approxkl           | 2.2595382e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.18e+03      |
| ep_reward_mean     | 3.98          |
| explained_variance | 0.0061        |
| fps                | 24            |
| n_updates          | 1053          |
| policy_entropy     | 2.2521477     |
| policy_loss        | 1.9241124e-06 |
| serial_timesteps   | 134784        |
| time_elapsed       | 8.5e+03       |
| total_timesteps    | 134784        |
| value_loss         | 0.00914802    |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 3.8567136e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 3.98           |
| explained_variance | 0.00525        |
| fps                | 11             |
| n_updates          | 1054           |
| policy_entropy     | 2.2571368      |
| policy_loss        | -0.00043913163 |
| se

---------------------------------------
| approxkl           | 3.8093353e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.15e+03       |
| ep_reward_mean     | 3.96           |
| explained_variance | -0.00588       |
| fps                | 24             |
| n_updates          | 1067           |
| policy_entropy     | 2.274967       |
| policy_loss        | -0.00023902114 |
| serial_timesteps   | 136576         |
| time_elapsed       | 8.61e+03       |
| total_timesteps    | 136576         |
| value_loss         | 0.004428083    |
---------------------------------------
--------------------------------------
| approxkl           | 2.0416294e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.15e+03      |
| ep_reward_mean     | 3.96          |
| explained_variance | 0.00593       |
| fps                | 24            |
| n_updates          | 1068          |
| policy_entropy     | 2.2805996     |
| policy_loss        | -4.348159e-05 |
| serial_t

--------------------------------------
| approxkl           | 1.0773828e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.15e+03      |
| ep_reward_mean     | 3.96          |
| explained_variance | 0.000845      |
| fps                | 24            |
| n_updates          | 1081          |
| policy_entropy     | 2.27528       |
| policy_loss        | 8.637458e-05  |
| serial_timesteps   | 138368        |
| time_elapsed       | 8.72e+03      |
| total_timesteps    | 138368        |
| value_loss         | 0.0046440293  |
--------------------------------------
---------------------------------------
| approxkl           | 1.2902943e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.15e+03       |
| ep_reward_mean     | 3.96           |
| explained_variance | -0.000735      |
| fps                | 24             |
| n_updates          | 1082           |
| policy_entropy     | 2.2773905      |
| policy_loss        | -0.00023599714 |
| serial_timest

Round done
---------------------------------------
| approxkl           | 1.0759967e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.16e+03       |
| ep_reward_mean     | 3.97           |
| explained_variance | 0.00532        |
| fps                | 11             |
| n_updates          | 1095           |
| policy_entropy     | 2.2671654      |
| policy_loss        | -0.00013716705 |
| serial_timesteps   | 140160         |
| time_elapsed       | 8.82e+03       |
| total_timesteps    | 140160         |
| value_loss         | 0.002453698    |
---------------------------------------
---------------------------------------
| approxkl           | 1.0423871e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.16e+03       |
| ep_reward_mean     | 3.97           |
| explained_variance | 0.00145        |
| fps                | 24             |
| n_updates          | 1096           |
| policy_entropy     | 2.2702143      |
| policy_loss        | -8.200

Round done
--------------------------------------
| approxkl           | 7.9842414e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.15e+03      |
| ep_reward_mean     | 3.96          |
| explained_variance | 0.0116        |
| fps                | 11            |
| n_updates          | 1109          |
| policy_entropy     | 2.1942248     |
| policy_loss        | -9.24319e-05  |
| serial_timesteps   | 141952        |
| time_elapsed       | 8.93e+03      |
| total_timesteps    | 141952        |
| value_loss         | 0.0033903162  |
--------------------------------------
--------------------------------------
| approxkl           | 6.184477e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.15e+03      |
| ep_reward_mean     | 3.96          |
| explained_variance | -0.0185       |
| fps                | 24            |
| n_updates          | 1110          |
| policy_entropy     | 2.221808      |
| policy_loss        | -8.568168e-05 |
| serial_times

---------------------------------------
| approxkl           | 3.846934e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.15e+03       |
| ep_reward_mean     | 3.96           |
| explained_variance | -0.00304       |
| fps                | 24             |
| n_updates          | 1123           |
| policy_entropy     | 2.2707672      |
| policy_loss        | -0.00011723116 |
| serial_timesteps   | 143744         |
| time_elapsed       | 9.04e+03       |
| total_timesteps    | 143744         |
| value_loss         | 0.005092847    |
---------------------------------------
-------------------------------------
| approxkl           | 1.965879e-06 |
| clipfrac           | 0.0          |
| ep_len_mean        | 3.15e+03     |
| ep_reward_mean     | 3.96         |
| explained_variance | -0.00114     |
| fps                | 23           |
| n_updates          | 1124         |
| policy_entropy     | 2.268559     |
| policy_loss        | -5.40521e-05 |
| serial_timesteps  

Round done
--------------------------------------
| approxkl           | 1.517526e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.15e+03      |
| ep_reward_mean     | 3.96          |
| explained_variance | -0.00103      |
| fps                | 11            |
| n_updates          | 1137          |
| policy_entropy     | 2.2577262     |
| policy_loss        | -0.0001929691 |
| serial_timesteps   | 145536        |
| time_elapsed       | 9.15e+03      |
| total_timesteps    | 145536        |
| value_loss         | 0.0051909843  |
--------------------------------------
--------------------------------------
| approxkl           | 1.479284e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.15e+03      |
| ep_reward_mean     | 3.96          |
| explained_variance | 0.00138       |
| fps                | 24            |
| n_updates          | 1138          |
| policy_entropy     | 2.2564037     |
| policy_loss        | -6.305054e-05 |
| serial_times

-------------------------------------
| approxkl           | 5.67847e-07  |
| clipfrac           | 0.0          |
| ep_len_mean        | 3.18e+03     |
| ep_reward_mean     | 4.04         |
| explained_variance | 0.00356      |
| fps                | 23           |
| n_updates          | 1151         |
| policy_entropy     | 2.2644603    |
| policy_loss        | 5.696155e-05 |
| serial_timesteps   | 147328       |
| time_elapsed       | 9.26e+03     |
| total_timesteps    | 147328       |
| value_loss         | 0.009683327  |
-------------------------------------
Stage done
---------------------------------------
| approxkl           | 2.5047882e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 4.04           |
| explained_variance | 0.0099         |
| fps                | 9              |
| n_updates          | 1152           |
| policy_entropy     | 2.2674263      |
| policy_loss        | -0.00013682246 |
| serial_timesteps 

---------------------------------------
| approxkl           | 1.9331171e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.16e+03       |
| ep_reward_mean     | 4.06           |
| explained_variance | -0.0175        |
| fps                | 23             |
| n_updates          | 1165           |
| policy_entropy     | 2.2655363      |
| policy_loss        | -0.00020711124 |
| serial_timesteps   | 149120         |
| time_elapsed       | 9.38e+03       |
| total_timesteps    | 149120         |
| value_loss         | 0.0023356501   |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 1.7929738e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.16e+03       |
| ep_reward_mean     | 4.06           |
| explained_variance | -0.00494       |
| fps                | 11             |
| n_updates          | 1166           |
| policy_entropy     | 2.268155       |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 5.006876e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.16e+03      |
| ep_reward_mean     | 4.06          |
| explained_variance | -0.000351     |
| fps                | 24            |
| n_updates          | 1179          |
| policy_entropy     | 2.2774916     |
| policy_loss        | -9.102747e-05 |
| serial_timesteps   | 150912        |
| time_elapsed       | 9.48e+03      |
| total_timesteps    | 150912        |
| value_loss         | 0.00721448    |
--------------------------------------
---------------------------------------
| approxkl           | 9.17881e-07    |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.16e+03       |
| ep_reward_mean     | 4.06           |
| explained_variance | 0.00195        |
| fps                | 24             |
| n_updates          | 1180           |
| policy_entropy     | 2.277422       |
| policy_loss        | -0.00028402032 |
| serial_timest

Stage done
-------------------------------------
| approxkl           | 7.5476e-07   |
| clipfrac           | 0.0          |
| ep_len_mean        | 3.16e+03     |
| ep_reward_mean     | 4.01         |
| explained_variance | 0.0137       |
| fps                | 9            |
| n_updates          | 1193         |
| policy_entropy     | 2.2495034    |
| policy_loss        | 7.137656e-06 |
| serial_timesteps   | 152704       |
| time_elapsed       | 9.59e+03     |
| total_timesteps    | 152704       |
| value_loss         | 0.0052831396 |
-------------------------------------
--------------------------------------
| approxkl           | 7.897278e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.16e+03      |
| ep_reward_mean     | 4.01          |
| explained_variance | -0.00854      |
| fps                | 23            |
| n_updates          | 1194          |
| policy_entropy     | 2.254601      |
| policy_loss        | -8.343719e-05 |
| serial_timesteps   | 152832

--------------------------------------
| approxkl           | 3.103542e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.16e+03      |
| ep_reward_mean     | 4.01          |
| explained_variance | 0.00631       |
| fps                | 24            |
| n_updates          | 1207          |
| policy_entropy     | 2.2840085     |
| policy_loss        | -6.883778e-05 |
| serial_timesteps   | 154496        |
| time_elapsed       | 9.7e+03       |
| total_timesteps    | 154496        |
| value_loss         | 0.0028732547  |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 1.0672634e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.16e+03      |
| ep_reward_mean     | 4.01          |
| explained_variance | -0.000883     |
| fps                | 11            |
| n_updates          | 1208          |
| policy_entropy     | 2.284616      |
| policy_loss        | -0.0001347661 |
| serial_times

---------------------------------------
| approxkl           | 3.633379e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.16e+03       |
| ep_reward_mean     | 4.01           |
| explained_variance | -0.00042       |
| fps                | 24             |
| n_updates          | 1221           |
| policy_entropy     | 2.296382       |
| policy_loss        | 0.000103862956 |
| serial_timesteps   | 156288         |
| time_elapsed       | 9.81e+03       |
| total_timesteps    | 156288         |
| value_loss         | 0.005132728    |
---------------------------------------
Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 7.4737386e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.19e+03      |
| ep_reward_mean     | 4.04          |
| explained_variance | 0.304         |
| fps                | 8             |
| n_updates          | 1222          |
| policy_entrop

---------------------------------------
| approxkl           | 8.357033e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 4.04           |
| explained_variance | -0.0012        |
| fps                | 24             |
| n_updates          | 1235           |
| policy_entropy     | 2.2927833      |
| policy_loss        | 1.52513385e-05 |
| serial_timesteps   | 158080         |
| time_elapsed       | 9.92e+03       |
| total_timesteps    | 158080         |
| value_loss         | 0.012864528    |
---------------------------------------
--------------------------------------
| approxkl           | 2.2198617e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.19e+03      |
| ep_reward_mean     | 4.04          |
| explained_variance | 0.000781      |
| fps                | 23            |
| n_updates          | 1236          |
| policy_entropy     | 2.2943692     |
| policy_loss        | -9.512901e-05 |
| serial_t

Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
---------------------------------------
| approxkl           | 1.5688502e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 4.08           |
| explained_variance | 0.0224         |
| fps                | 8              |
| n_updates          | 1249           |
| policy_entropy     | 2.2821238      |
| policy_loss        | -0.00014176592 |
| serial_timesteps   | 159872         |
| time_elapsed       | 1e+04          |
| total_timesteps    | 159872         |
| value_loss         | 0.009136241    |
---------------------------------------
--------------------------------------
| approxkl           | 1.3376215e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 4.08          |
| explained_variance | 0.107         |
| fps                | 24            |
| n_updates          | 1250          |
| policy_entrop

---------------------------------------
| approxkl           | 8.9538435e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 4.08           |
| explained_variance | -0.00106       |
| fps                | 24             |
| n_updates          | 1263           |
| policy_entropy     | 2.295065       |
| policy_loss        | -0.00010945322 |
| serial_timesteps   | 161664         |
| time_elapsed       | 1.01e+04       |
| total_timesteps    | 161664         |
| value_loss         | 0.009221452    |
---------------------------------------
---------------------------------------
| approxkl           | 6.2884476e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 4.08           |
| explained_variance | -0.00134       |
| fps                | 24             |
| n_updates          | 1264           |
| policy_entropy     | 2.2957616      |
| policy_loss        | -1.8267892e-05 |


Round done
--------------------------------------
| approxkl           | 1.9873478e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.18e+03      |
| ep_reward_mean     | 4.03          |
| explained_variance | 0.00345       |
| fps                | 10            |
| n_updates          | 1277          |
| policy_entropy     | 2.2466857     |
| policy_loss        | 0.00021341443 |
| serial_timesteps   | 163456        |
| time_elapsed       | 1.02e+04      |
| total_timesteps    | 163456        |
| value_loss         | 0.0054668444  |
--------------------------------------
---------------------------------------
| approxkl           | 3.4085036e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 4.03           |
| explained_variance | -0.00267       |
| fps                | 23             |
| n_updates          | 1278           |
| policy_entropy     | 2.2478564      |
| policy_loss        | -0.00013902783 |
| se

Stage done
---------------------------------------
| approxkl           | 5.2209293e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.18e+03       |
| ep_reward_mean     | 4.03           |
| explained_variance | -0.00118       |
| fps                | 9              |
| n_updates          | 1291           |
| policy_entropy     | 2.241493       |
| policy_loss        | -3.6112964e-05 |
| serial_timesteps   | 165248         |
| time_elapsed       | 1.04e+04       |
| total_timesteps    | 165248         |
| value_loss         | 0.008294742    |
---------------------------------------
--------------------------------------
| approxkl           | 3.4335943e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.18e+03      |
| ep_reward_mean     | 4.03          |
| explained_variance | -0.00156      |
| fps                | 23            |
| n_updates          | 1292          |
| policy_entropy     | 2.242726      |
| policy_loss        | -3.157556e-05 |

--------------------------------------
| approxkl           | 4.570422e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 3.97          |
| explained_variance | 0.00712       |
| fps                | 23            |
| n_updates          | 1305          |
| policy_entropy     | 2.201032      |
| policy_loss        | -0.0004366748 |
| serial_timesteps   | 167040        |
| time_elapsed       | 1.05e+04      |
| total_timesteps    | 167040        |
| value_loss         | 0.0033693146  |
--------------------------------------
Stage done
--------------------------------------
| approxkl           | 6.766565e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.2e+03       |
| ep_reward_mean     | 3.97          |
| explained_variance | 0.00911       |
| fps                | 9             |
| n_updates          | 1306          |
| policy_entropy     | 2.2007465     |
| policy_loss        | -0.0002678912 |
| serial_times

---------------------------------------
| approxkl           | 8.0909115e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 3.97           |
| explained_variance | -0.00246       |
| fps                | 23             |
| n_updates          | 1319           |
| policy_entropy     | 2.2525682      |
| policy_loss        | -0.00016168691 |
| serial_timesteps   | 168832         |
| time_elapsed       | 1.06e+04       |
| total_timesteps    | 168832         |
| value_loss         | 0.0050790273   |
---------------------------------------
---------------------------------------
| approxkl           | 1.421753e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 3.97           |
| explained_variance | -0.00123       |
| fps                | 24             |
| n_updates          | 1320           |
| policy_entropy     | 2.2522423      |
| policy_loss        | -0.00012952462 |


---------------------------------------
| approxkl           | 8.147575e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 3.97           |
| explained_variance | 0.000483       |
| fps                | 24             |
| n_updates          | 1333           |
| policy_entropy     | 2.2254243      |
| policy_loss        | -0.00011689961 |
| serial_timesteps   | 170624         |
| time_elapsed       | 1.07e+04       |
| total_timesteps    | 170624         |
| value_loss         | 0.0050686374   |
---------------------------------------
Round done
---------------------------------------
| approxkl           | 9.851751e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.2e+03        |
| ep_reward_mean     | 3.97           |
| explained_variance | 0.000277       |
| fps                | 11             |
| n_updates          | 1334           |
| policy_entropy     | 2.2251348      |
| policy_loss        | -0.000

---------------------------------------
| approxkl           | 2.335901e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.24e+03       |
| ep_reward_mean     | 4.01           |
| explained_variance | -0.00403       |
| fps                | 23             |
| n_updates          | 1347           |
| policy_entropy     | 2.174862       |
| policy_loss        | -0.00011547282 |
| serial_timesteps   | 172416         |
| time_elapsed       | 1.08e+04       |
| total_timesteps    | 172416         |
| value_loss         | 0.005904516    |
---------------------------------------
Stage done
--------------------------------------
| approxkl           | 9.4158804e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.24e+03      |
| ep_reward_mean     | 4.01          |
| explained_variance | -0.00802      |
| fps                | 9             |
| n_updates          | 1348          |
| policy_entropy     | 2.187665      |
| policy_loss        | 2.006255e-05  |

--------------------------------------
| approxkl           | 7.8212537e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.22e+03      |
| ep_reward_mean     | 3.99          |
| explained_variance | -0.0437       |
| fps                | 24            |
| n_updates          | 1361          |
| policy_entropy     | 2.1190183     |
| policy_loss        | -0.0002263356 |
| serial_timesteps   | 174208        |
| time_elapsed       | 1.09e+04      |
| total_timesteps    | 174208        |
| value_loss         | 0.0064003533  |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 9.915577e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.22e+03      |
| ep_reward_mean     | 3.99          |
| explained_variance | -0.0144       |
| fps                | 10            |
| n_updates          | 1362          |
| policy_entropy     | 2.1708796     |
| policy_loss        | 5.7885423e-05 |
| serial_times

Round done
---------------------------------------
| approxkl           | 6.061281e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 3.99           |
| explained_variance | 0.000643       |
| fps                | 10             |
| n_updates          | 1375           |
| policy_entropy     | 2.2575994      |
| policy_loss        | -3.8830563e-05 |
| serial_timesteps   | 176000         |
| time_elapsed       | 1.1e+04        |
| total_timesteps    | 176000         |
| value_loss         | 0.0032665366   |
---------------------------------------
--------------------------------------
| approxkl           | 1.2027597e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.22e+03      |
| ep_reward_mean     | 3.99          |
| explained_variance | -0.00233      |
| fps                | 23            |
| n_updates          | 1376          |
| policy_entropy     | 2.2574806     |
| policy_loss        | -7.617846e-05 |

Round done
----------------------------------------
| approxkl           | 1.0338911e-06   |
| clipfrac           | 0.0             |
| ep_len_mean        | 3.21e+03        |
| ep_reward_mean     | 3.98            |
| explained_variance | 0.00262         |
| fps                | 11              |
| n_updates          | 1389            |
| policy_entropy     | 2.2337627       |
| policy_loss        | -0.000110052526 |
| serial_timesteps   | 177792          |
| time_elapsed       | 1.11e+04        |
| total_timesteps    | 177792          |
| value_loss         | 0.0038854922    |
----------------------------------------
---------------------------------------
| approxkl           | 8.478484e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 3.98           |
| explained_variance | -0.00265       |
| fps                | 24             |
| n_updates          | 1390           |
| policy_entropy     | 2.234899       |
| policy_loss 

---------------------------------------
| approxkl           | 2.4354822e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 3.94           |
| explained_variance | 0.0103         |
| fps                | 24             |
| n_updates          | 1403           |
| policy_entropy     | 2.22337        |
| policy_loss        | -0.00012545474 |
| serial_timesteps   | 179584         |
| time_elapsed       | 1.12e+04       |
| total_timesteps    | 179584         |
| value_loss         | 0.0024038875   |
---------------------------------------
Stage done
--------------------------------------
| approxkl           | 9.913012e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.19e+03      |
| ep_reward_mean     | 3.94          |
| explained_variance | 0.00285       |
| fps                | 9             |
| n_updates          | 1404          |
| policy_entropy     | 2.2276192     |
| policy_loss        | -7.591769e-05 |

--------------------------------------
| approxkl           | 2.2721001e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.19e+03      |
| ep_reward_mean     | 3.94          |
| explained_variance | 0.000369      |
| fps                | 23            |
| n_updates          | 1417          |
| policy_entropy     | 2.2571645     |
| policy_loss        | -0.0002307389 |
| serial_timesteps   | 181376        |
| time_elapsed       | 1.13e+04      |
| total_timesteps    | 181376        |
| value_loss         | 0.008315975   |
--------------------------------------
---------------------------------------
| approxkl           | 9.988762e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 3.94           |
| explained_variance | -0.000651      |
| fps                | 24             |
| n_updates          | 1418           |
| policy_entropy     | 2.2593122      |
| policy_loss        | 0.000115763396 |
| serial_timest

---------------------------------------
| approxkl           | 3.312003e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 3.97           |
| explained_variance | -0.00043       |
| fps                | 24             |
| n_updates          | 1431           |
| policy_entropy     | 2.2202158      |
| policy_loss        | -0.00023412891 |
| serial_timesteps   | 183168         |
| time_elapsed       | 1.15e+04       |
| total_timesteps    | 183168         |
| value_loss         | 0.0055769617   |
---------------------------------------
--------------------------------------
| approxkl           | 2.4269589e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.19e+03      |
| ep_reward_mean     | 3.97          |
| explained_variance | 0.00309       |
| fps                | 24            |
| n_updates          | 1432          |
| policy_entropy     | 2.2195406     |
| policy_loss        | -9.333342e-05 |
| serial_t

---------------------------------------
| approxkl           | 1.1443058e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 3.97           |
| explained_variance | 4.84e-05       |
| fps                | 24             |
| n_updates          | 1445           |
| policy_entropy     | 2.239666       |
| policy_loss        | -3.3156015e-05 |
| serial_timesteps   | 184960         |
| time_elapsed       | 1.16e+04       |
| total_timesteps    | 184960         |
| value_loss         | 0.004811833    |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 8.765504e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 3.97           |
| explained_variance | -0.00054       |
| fps                | 9              |
| n_updates          | 1446           |
| policy_entropy     | 2.2414246      |
| policy_loss        | -0.000

---------------------------------------
| approxkl           | 7.490485e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.19e+03       |
| ep_reward_mean     | 3.97           |
| explained_variance | -0.000462      |
| fps                | 24             |
| n_updates          | 1459           |
| policy_entropy     | 2.2452464      |
| policy_loss        | -8.7842345e-06 |
| serial_timesteps   | 186752         |
| time_elapsed       | 1.17e+04       |
| total_timesteps    | 186752         |
| value_loss         | 0.006513678    |
---------------------------------------
Round done
--------------------------------------
| approxkl           | 2.4388823e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.19e+03      |
| ep_reward_mean     | 3.97          |
| explained_variance | -0.000438     |
| fps                | 11            |
| n_updates          | 1460          |
| policy_entropy     | 2.2430525     |
| policy_loss        | -0.0003045164 |

---------------------------------------
| approxkl           | 8.915524e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 4.02           |
| explained_variance | -0.000384      |
| fps                | 23             |
| n_updates          | 1473           |
| policy_entropy     | 2.2127285      |
| policy_loss        | -0.00014922768 |
| serial_timesteps   | 188544         |
| time_elapsed       | 1.18e+04       |
| total_timesteps    | 188544         |
| value_loss         | 0.0055174325   |
---------------------------------------
--------------------------------------
| approxkl           | 2.4563617e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.23e+03      |
| ep_reward_mean     | 4.02          |
| explained_variance | 0.000791      |
| fps                | 24            |
| n_updates          | 1474          |
| policy_entropy     | 2.2133899     |
| policy_loss        | -0.0003334619 |
| serial_t

Stage done
---------------------------------------
| approxkl           | 8.2980773e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 3.99           |
| explained_variance | -0.0109        |
| fps                | 9              |
| n_updates          | 1487           |
| policy_entropy     | 2.1579766      |
| policy_loss        | -5.9075654e-05 |
| serial_timesteps   | 190336         |
| time_elapsed       | 1.19e+04       |
| total_timesteps    | 190336         |
| value_loss         | 0.005095344    |
---------------------------------------
---------------------------------------
| approxkl           | 7.6001163e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 3.99           |
| explained_variance | 0.00145        |
| fps                | 24             |
| n_updates          | 1488           |
| policy_entropy     | 2.1631994      |
| policy_loss        | -0.000

---------------------------------------
| approxkl           | 6.559561e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 3.99           |
| explained_variance | -0.00113       |
| fps                | 24             |
| n_updates          | 1501           |
| policy_entropy     | 2.1993551      |
| policy_loss        | -6.0085207e-05 |
| serial_timesteps   | 192128         |
| time_elapsed       | 1.2e+04        |
| total_timesteps    | 192128         |
| value_loss         | 0.0060586603   |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 6.329332e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 3.99           |
| explained_variance | -3.16e-05      |
| fps                | 9              |
| n_updates          | 1502           |
| policy_entropy     | 2.1999815      |
| policy_loss        | -0.000

---------------------------------------
| approxkl           | 9.535542e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 3.97           |
| explained_variance | -0.000591      |
| fps                | 24             |
| n_updates          | 1515           |
| policy_entropy     | 2.1639447      |
| policy_loss        | -0.00018810853 |
| serial_timesteps   | 193920         |
| time_elapsed       | 1.21e+04       |
| total_timesteps    | 193920         |
| value_loss         | 0.0058950656   |
---------------------------------------
---------------------------------------
| approxkl           | 7.596381e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 3.97           |
| explained_variance | 0.0154         |
| fps                | 24             |
| n_updates          | 1516           |
| policy_entropy     | 2.1868298      |
| policy_loss        | -0.00031083822 |


Round done
-------------------------------------
| approxkl           | 5.416963e-07 |
| clipfrac           | 0.0          |
| ep_len_mean        | 3.22e+03     |
| ep_reward_mean     | 3.97         |
| explained_variance | -0.00018     |
| fps                | 10           |
| n_updates          | 1529         |
| policy_entropy     | 2.2354162    |
| policy_loss        | 6.167218e-06 |
| serial_timesteps   | 195712       |
| time_elapsed       | 1.22e+04     |
| total_timesteps    | 195712       |
| value_loss         | 0.008412724  |
-------------------------------------
---------------------------------------
| approxkl           | 6.647638e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.22e+03       |
| ep_reward_mean     | 3.97           |
| explained_variance | -0.000476      |
| fps                | 23             |
| n_updates          | 1530           |
| policy_entropy     | 2.2363038      |
| policy_loss        | -0.00016640872 |
| serial_timesteps 

--------------------------------------
| approxkl           | 4.0173545e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.23e+03      |
| ep_reward_mean     | 4.01          |
| explained_variance | 0.00941       |
| fps                | 24            |
| n_updates          | 1543          |
| policy_entropy     | 2.076193      |
| policy_loss        | -8.319691e-05 |
| serial_timesteps   | 197504        |
| time_elapsed       | 1.23e+04      |
| total_timesteps    | 197504        |
| value_loss         | 0.002342968   |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 5.843252e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.23e+03      |
| ep_reward_mean     | 4.01          |
| explained_variance | 0.0289        |
| fps                | 11            |
| n_updates          | 1544          |
| policy_entropy     | 2.1300302     |
| policy_loss        | 3.9428473e-05 |
| serial_times

Stage done
----------------------------------------
| approxkl           | 1.0923436e-06   |
| clipfrac           | 0.0             |
| ep_len_mean        | 3.23e+03        |
| ep_reward_mean     | 4.01            |
| explained_variance | 0.000866        |
| fps                | 9               |
| n_updates          | 1557            |
| policy_entropy     | 2.1931233       |
| policy_loss        | -0.000104924664 |
| serial_timesteps   | 199296          |
| time_elapsed       | 1.24e+04        |
| total_timesteps    | 199296          |
| value_loss         | 0.009256841     |
----------------------------------------
---------------------------------------
| approxkl           | 2.1629512e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.23e+03       |
| ep_reward_mean     | 4.01           |
| explained_variance | 0.000742       |
| fps                | 24             |
| n_updates          | 1558           |
| policy_entropy     | 2.195815       |
| policy_loss 

--------------------------------------
| approxkl           | 3.7330706e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.23e+03      |
| ep_reward_mean     | 3.95          |
| explained_variance | 0.00389       |
| fps                | 24            |
| n_updates          | 1571          |
| policy_entropy     | 2.1983783     |
| policy_loss        | 1.5558675e-05 |
| serial_timesteps   | 201088        |
| time_elapsed       | 1.25e+04      |
| total_timesteps    | 201088        |
| value_loss         | 0.0037364294  |
--------------------------------------
--------------------------------------
| approxkl           | 1.7977799e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.23e+03      |
| ep_reward_mean     | 3.95          |
| explained_variance | -0.00631      |
| fps                | 24            |
| n_updates          | 1572          |
| policy_entropy     | 2.2052402     |
| policy_loss        | -0.00014843   |
| serial_timesteps   | 20

--------------------------------------
| approxkl           | 6.1004664e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.21e+03      |
| ep_reward_mean     | 3.9           |
| explained_variance | 0.000949      |
| fps                | 23            |
| n_updates          | 1585          |
| policy_entropy     | 2.196606      |
| policy_loss        | -0.000163286  |
| serial_timesteps   | 202880        |
| time_elapsed       | 1.27e+04      |
| total_timesteps    | 202880        |
| value_loss         | 0.0027976288  |
--------------------------------------
---------------------------------------
| approxkl           | 1.1008912e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 3.9            |
| explained_variance | 0.00794        |
| fps                | 22             |
| n_updates          | 1586           |
| policy_entropy     | 2.2013288      |
| policy_loss        | -0.00022559136 |
| serial_timest

Stage done
---------------------------------------
| approxkl           | 5.158524e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 3.9            |
| explained_variance | 0.00034        |
| fps                | 9              |
| n_updates          | 1599           |
| policy_entropy     | 2.2057817      |
| policy_loss        | -0.00023691542 |
| serial_timesteps   | 204672         |
| time_elapsed       | 1.28e+04       |
| total_timesteps    | 204672         |
| value_loss         | 0.008891102    |
---------------------------------------
---------------------------------------
| approxkl           | 4.7858275e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 3.9            |
| explained_variance | -0.000505      |
| fps                | 23             |
| n_updates          | 1600           |
| policy_entropy     | 2.2100797      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 4.4292625e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.21e+03      |
| ep_reward_mean     | 3.91          |
| explained_variance | 0.0135        |
| fps                | 24            |
| n_updates          | 1613          |
| policy_entropy     | 2.2013774     |
| policy_loss        | 2.6710331e-06 |
| serial_timesteps   | 206464        |
| time_elapsed       | 1.29e+04      |
| total_timesteps    | 206464        |
| value_loss         | 0.0018896562  |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 4.6869002e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 3.91           |
| explained_variance | -0.00281       |
| fps                | 11             |
| n_updates          | 1614           |
| policy_entropy     | 2.2038467      |
| policy_loss        | -0.00011360273 |
| se

---------------------------------------
| approxkl           | 4.0988607e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 3.91           |
| explained_variance | 0.00153        |
| fps                | 24             |
| n_updates          | 1627           |
| policy_entropy     | 2.2277803      |
| policy_loss        | -0.00014331192 |
| serial_timesteps   | 208256         |
| time_elapsed       | 1.3e+04        |
| total_timesteps    | 208256         |
| value_loss         | 0.0064167334   |
---------------------------------------
--------------------------------------
| approxkl           | 1.4756707e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.21e+03      |
| ep_reward_mean     | 3.91          |
| explained_variance | -0.000211     |
| fps                | 24            |
| n_updates          | 1628          |
| policy_entropy     | 2.2297533     |
| policy_loss        | 6.337091e-05  |
| serial_t

--------------------------------------
| approxkl           | 1.8282353e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.21e+03      |
| ep_reward_mean     | 3.91          |
| explained_variance | -8.73e-05     |
| fps                | 24            |
| n_updates          | 1641          |
| policy_entropy     | 2.2258897     |
| policy_loss        | 4.6860427e-05 |
| serial_timesteps   | 210048        |
| time_elapsed       | 1.31e+04      |
| total_timesteps    | 210048        |
| value_loss         | 0.002977052   |
--------------------------------------
---------------------------------------
| approxkl           | 8.377806e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.21e+03       |
| ep_reward_mean     | 3.91           |
| explained_variance | 0.00388        |
| fps                | 24             |
| n_updates          | 1642           |
| policy_entropy     | 2.2259452      |
| policy_loss        | -4.3759122e-05 |
| serial_timest

Round done
--------------------------------------
| approxkl           | 9.247489e-06  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.24e+03      |
| ep_reward_mean     | 3.95          |
| explained_variance | -0.00199      |
| fps                | 10            |
| n_updates          | 1655          |
| policy_entropy     | 2.2409203     |
| policy_loss        | 2.1059066e-05 |
| serial_timesteps   | 211840        |
| time_elapsed       | 1.32e+04      |
| total_timesteps    | 211840        |
| value_loss         | 0.010262457   |
--------------------------------------
---------------------------------------
| approxkl           | 3.7042848e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.24e+03       |
| ep_reward_mean     | 3.95           |
| explained_variance | -0.000432      |
| fps                | 24             |
| n_updates          | 1656           |
| policy_entropy     | 2.2476184      |
| policy_loss        | -0.00020424975 |
| se

--------------------------------------
| approxkl           | 1.9478907e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.24e+03      |
| ep_reward_mean     | 3.95          |
| explained_variance | 0.000473      |
| fps                | 24            |
| n_updates          | 1669          |
| policy_entropy     | 2.2512927     |
| policy_loss        | -1.023151e-05 |
| serial_timesteps   | 213632        |
| time_elapsed       | 1.33e+04      |
| total_timesteps    | 213632        |
| value_loss         | 0.004830043   |
--------------------------------------
---------------------------------------
| approxkl           | 9.442874e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.24e+03       |
| ep_reward_mean     | 3.95           |
| explained_variance | 0.00493        |
| fps                | 24             |
| n_updates          | 1670           |
| policy_entropy     | 2.253735       |
| policy_loss        | -4.8716553e-05 |
| serial_timest

Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start
--------------------------------------
| approxkl           | 4.942891e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.01          |
| explained_variance | 0.434         |
| fps                | 8             |
| n_updates          | 1683          |
| policy_entropy     | 2.0938463     |
| policy_loss        | -6.485358e-05 |
| serial_timesteps   | 215424        |
| time_elapsed       | 1.34e+04      |
| total_timesteps    | 215424        |
| value_loss         | 0.009742226   |
--------------------------------------
---------------------------------------
| approxkl           | 1.916394e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.01           |
| explained_variance | 0.0594         |
| fps                | 24             |
| n_updates          | 1684           |
| policy_entropy     |

Round done
---------------------------------------
| approxkl           | 9.293393e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.01           |
| explained_variance | -0.00062       |
| fps                | 12             |
| n_updates          | 1697           |
| policy_entropy     | 2.238793       |
| policy_loss        | -0.00051903725 |
| serial_timesteps   | 217216         |
| time_elapsed       | 1.35e+04       |
| total_timesteps    | 217216         |
| value_loss         | 0.0066659465   |
---------------------------------------
---------------------------------------
| approxkl           | 9.761956e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.01           |
| explained_variance | -0.00232       |
| fps                | 24             |
| n_updates          | 1698           |
| policy_entropy     | 2.23347        |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 1.0838706e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.01          |
| explained_variance | -0.00372      |
| fps                | 24            |
| n_updates          | 1711          |
| policy_entropy     | 2.1863768     |
| policy_loss        | 7.091835e-05  |
| serial_timesteps   | 219008        |
| time_elapsed       | 1.36e+04      |
| total_timesteps    | 219008        |
| value_loss         | 0.0032126568  |
--------------------------------------
Stage done
---------------------------------------
| approxkl           | 8.752087e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.01           |
| explained_variance | 0.00618        |
| fps                | 9              |
| n_updates          | 1712           |
| policy_entropy     | 2.1907115      |
| policy_loss        | -0.00017202133 |
| se

Stage done
--------------------------------------
| approxkl           | 4.0044716e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.01          |
| explained_variance | 0.000645      |
| fps                | 9             |
| n_updates          | 1725          |
| policy_entropy     | 2.1898253     |
| policy_loss        | -6.626174e-05 |
| serial_timesteps   | 220800        |
| time_elapsed       | 1.38e+04      |
| total_timesteps    | 220800        |
| value_loss         | 0.0027535604  |
--------------------------------------
--------------------------------------
| approxkl           | 4.203731e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.01          |
| explained_variance | -0.00319      |
| fps                | 23            |
| n_updates          | 1726          |
| policy_entropy     | 2.194033      |
| policy_loss        | -8.413568e-05 |
| serial_times

---------------------------------------
| approxkl           | 4.3952156e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.02           |
| explained_variance | -0.00836       |
| fps                | 24             |
| n_updates          | 1739           |
| policy_entropy     | 2.14113        |
| policy_loss        | -7.9821795e-05 |
| serial_timesteps   | 222592         |
| time_elapsed       | 1.39e+04       |
| total_timesteps    | 222592         |
| value_loss         | 0.009948233    |
---------------------------------------
Stage done
---------------------------------------
| approxkl           | 5.57503e-07    |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.02           |
| explained_variance | 0.00898        |
| fps                | 9              |
| n_updates          | 1740           |
| policy_entropy     | 2.1470609      |
| policy_loss        | -0.000

---------------------------------------
| approxkl           | 4.3325855e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.02           |
| explained_variance | -0.000865      |
| fps                | 23             |
| n_updates          | 1753           |
| policy_entropy     | 2.1896899      |
| policy_loss        | -2.5874004e-05 |
| serial_timesteps   | 224384         |
| time_elapsed       | 1.4e+04        |
| total_timesteps    | 224384         |
| value_loss         | 0.009819788    |
---------------------------------------
--------------------------------------
| approxkl           | 2.9947654e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.26e+03      |
| ep_reward_mean     | 4.02          |
| explained_variance | -0.000463     |
| fps                | 23            |
| n_updates          | 1754          |
| policy_entropy     | 2.1905446     |
| policy_loss        | 3.444869e-05  |
| serial_t

Stage done
---------------------------------------
| approxkl           | 1.5106808e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.02           |
| explained_variance | 0.00599        |
| fps                | 9              |
| n_updates          | 1767           |
| policy_entropy     | 2.1897683      |
| policy_loss        | -0.00010924041 |
| serial_timesteps   | 226176         |
| time_elapsed       | 1.41e+04       |
| total_timesteps    | 226176         |
| value_loss         | 0.009751389    |
---------------------------------------
---------------------------------------
| approxkl           | 1.1091879e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.26e+03       |
| ep_reward_mean     | 4.02           |
| explained_variance | -0.000261      |
| fps                | 24             |
| n_updates          | 1768           |
| policy_entropy     | 2.1945238      |
| policy_loss        | -5.086

Stage done
---------------------------------------
| approxkl           | 3.724886e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.24e+03       |
| ep_reward_mean     | 4.04           |
| explained_variance | 0.00389        |
| fps                | 9              |
| n_updates          | 1781           |
| policy_entropy     | 2.2132454      |
| policy_loss        | -0.00010710582 |
| serial_timesteps   | 227968         |
| time_elapsed       | 1.42e+04       |
| total_timesteps    | 227968         |
| value_loss         | 0.0077116587   |
---------------------------------------
---------------------------------------
| approxkl           | 5.9492356e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.24e+03       |
| ep_reward_mean     | 4.04           |
| explained_variance | 0.00163        |
| fps                | 24             |
| n_updates          | 1782           |
| policy_entropy     | 2.2165947      |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 7.71509e-07   |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.24e+03      |
| ep_reward_mean     | 4.04          |
| explained_variance | 0.00168       |
| fps                | 24            |
| n_updates          | 1795          |
| policy_entropy     | 2.2412894     |
| policy_loss        | -0.0001508072 |
| serial_timesteps   | 229760        |
| time_elapsed       | 1.43e+04      |
| total_timesteps    | 229760        |
| value_loss         | 0.0068809763  |
--------------------------------------
Round done
---------------------------------------
| approxkl           | 8.2462196e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.24e+03       |
| ep_reward_mean     | 4.04           |
| explained_variance | -0.000102      |
| fps                | 10             |
| n_updates          | 1796           |
| policy_entropy     | 2.241702       |
| policy_loss        | -0.00018607825 |
| se

---------------------------------------
| approxkl           | 7.0041375e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.25e+03       |
| ep_reward_mean     | 4.04           |
| explained_variance | -0.00516       |
| fps                | 24             |
| n_updates          | 1809           |
| policy_entropy     | 2.2064567      |
| policy_loss        | -6.0006976e-05 |
| serial_timesteps   | 231552         |
| time_elapsed       | 1.44e+04       |
| total_timesteps    | 231552         |
| value_loss         | 0.005709054    |
---------------------------------------
----------------------------------------
| approxkl           | 8.842861e-07    |
| clipfrac           | 0.0             |
| ep_len_mean        | 3.25e+03        |
| ep_reward_mean     | 4.04            |
| explained_variance | -0.00172        |
| fps                | 24              |
| n_updates          | 1810            |
| policy_entropy     | 2.21602         |
| policy_loss        | -0.00011

---------------------------------------
| approxkl           | 8.721964e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.25e+03       |
| ep_reward_mean     | 4.04           |
| explained_variance | 0.00139        |
| fps                | 24             |
| n_updates          | 1823           |
| policy_entropy     | 2.237253       |
| policy_loss        | -0.00015477464 |
| serial_timesteps   | 233344         |
| time_elapsed       | 1.45e+04       |
| total_timesteps    | 233344         |
| value_loss         | 0.0037760173   |
---------------------------------------
Round done
--------------------------------------
| approxkl           | 4.9965365e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.25e+03      |
| ep_reward_mean     | 4.04          |
| explained_variance | -0.000361     |
| fps                | 11            |
| n_updates          | 1824          |
| policy_entropy     | 2.2392502     |
| policy_loss        | -6.261468e-05 |

--------------------------------------
| approxkl           | 2.9324312e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.25e+03      |
| ep_reward_mean     | 4.04          |
| explained_variance | 0.000366      |
| fps                | 23            |
| n_updates          | 1837          |
| policy_entropy     | 2.2211547     |
| policy_loss        | 6.394833e-05  |
| serial_timesteps   | 235136        |
| time_elapsed       | 1.46e+04      |
| total_timesteps    | 235136        |
| value_loss         | 0.0041139675  |
--------------------------------------
---------------------------------------
| approxkl           | 1.8517813e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.25e+03       |
| ep_reward_mean     | 4.04           |
| explained_variance | -0.000698      |
| fps                | 21             |
| n_updates          | 1838           |
| policy_entropy     | 2.218696       |
| policy_loss        | -0.00022768229 |
| serial_timest

Stage done
---------------------------------------
| approxkl           | 6.971079e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.28e+03       |
| ep_reward_mean     | 4.05           |
| explained_variance | 0.00975        |
| fps                | 10             |
| n_updates          | 1851           |
| policy_entropy     | 2.1751199      |
| policy_loss        | -0.00019111484 |
| serial_timesteps   | 236928         |
| time_elapsed       | 1.47e+04       |
| total_timesteps    | 236928         |
| value_loss         | 0.005575013    |
---------------------------------------
---------------------------------------
| approxkl           | 1.8338586e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.28e+03       |
| ep_reward_mean     | 4.05           |
| explained_variance | -0.00205       |
| fps                | 24             |
| n_updates          | 1852           |
| policy_entropy     | 2.179516       |
| policy_loss        | -0.000

--------------------------------------
| approxkl           | 5.6497784e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.28e+03      |
| ep_reward_mean     | 4.05          |
| explained_variance | -0.000637     |
| fps                | 22            |
| n_updates          | 1865          |
| policy_entropy     | 2.1681716     |
| policy_loss        | 2.916716e-05  |
| serial_timesteps   | 238720        |
| time_elapsed       | 1.49e+04      |
| total_timesteps    | 238720        |
| value_loss         | 0.0043288055  |
--------------------------------------
---------------------------------------
| approxkl           | 3.872674e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.28e+03       |
| ep_reward_mean     | 4.05           |
| explained_variance | -0.000327      |
| fps                | 22             |
| n_updates          | 1866           |
| policy_entropy     | 2.1642952      |
| policy_loss        | -0.00026170537 |
| serial_timest

Round done
---------------------------------------
| approxkl           | 9.860737e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.27e+03       |
| ep_reward_mean     | 4.06           |
| explained_variance | 0.000193       |
| fps                | 10             |
| n_updates          | 1879           |
| policy_entropy     | 2.129657       |
| policy_loss        | -3.4597702e-05 |
| serial_timesteps   | 240512         |
| time_elapsed       | 1.5e+04        |
| total_timesteps    | 240512         |
| value_loss         | 0.01751902     |
---------------------------------------
--------------------------------------
| approxkl           | 7.61474e-07   |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.27e+03      |
| ep_reward_mean     | 4.06          |
| explained_variance | 0.00545       |
| fps                | 21            |
| n_updates          | 1880          |
| policy_entropy     | 2.128581      |
| policy_loss        | 2.5123358e-05 |

Round done
---------------------------------------
| approxkl           | 6.6240655e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.27e+03       |
| ep_reward_mean     | 4.06           |
| explained_variance | 0.00042        |
| fps                | 10             |
| n_updates          | 1893           |
| policy_entropy     | 2.1433997      |
| policy_loss        | -8.6173415e-05 |
| serial_timesteps   | 242304         |
| time_elapsed       | 1.51e+04       |
| total_timesteps    | 242304         |
| value_loss         | 0.00788769     |
---------------------------------------
--------------------------------------
| approxkl           | 2.0872935e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.27e+03      |
| ep_reward_mean     | 4.06          |
| explained_variance | -0.000335     |
| fps                | 22            |
| n_updates          | 1894          |
| policy_entropy     | 2.1476166     |
| policy_loss        | -3.652554e-05 |

---------------------------------------
| approxkl           | 1.0063865e-06  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.28e+03       |
| ep_reward_mean     | 4.08           |
| explained_variance | -0.00287       |
| fps                | 22             |
| n_updates          | 1907           |
| policy_entropy     | 2.1396425      |
| policy_loss        | -0.00010471605 |
| serial_timesteps   | 244096         |
| time_elapsed       | 1.52e+04       |
| total_timesteps    | 244096         |
| value_loss         | 0.004975272    |
---------------------------------------
---------------------------------------
| approxkl           | 8.062798e-07   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.28e+03       |
| ep_reward_mean     | 4.08           |
| explained_variance | 0.00405        |
| fps                | 22             |
| n_updates          | 1908           |
| policy_entropy     | 2.1413512      |
| policy_loss        | -1.9028783e-05 |


Round done
--------------------------------------
| approxkl           | 1.1882283e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.27e+03      |
| ep_reward_mean     | 4.08          |
| explained_variance | 0.0136        |
| fps                | 11            |
| n_updates          | 1921          |
| policy_entropy     | 2.158764      |
| policy_loss        | 5.6836754e-05 |
| serial_timesteps   | 245888        |
| time_elapsed       | 1.53e+04      |
| total_timesteps    | 245888        |
| value_loss         | 0.0011366187  |
--------------------------------------
--------------------------------------
| approxkl           | 5.4829246e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.27e+03      |
| ep_reward_mean     | 4.08          |
| explained_variance | -0.0087       |
| fps                | 24            |
| n_updates          | 1922          |
| policy_entropy     | 2.171906      |
| policy_loss        | -6.841123e-05 |
| serial_times

--------------------------------------
| approxkl           | 1.3944173e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.27e+03      |
| ep_reward_mean     | 4.08          |
| explained_variance | -0.000424     |
| fps                | 21            |
| n_updates          | 1935          |
| policy_entropy     | 2.206231      |
| policy_loss        | 3.5137404e-05 |
| serial_timesteps   | 247680        |
| time_elapsed       | 1.54e+04      |
| total_timesteps    | 247680        |
| value_loss         | 0.010391757   |
--------------------------------------
Round done
--------------------------------------
| approxkl           | 3.613781e-07  |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.27e+03      |
| ep_reward_mean     | 4.08          |
| explained_variance | 0.000182      |
| fps                | 10            |
| n_updates          | 1936          |
| policy_entropy     | 2.2075353     |
| policy_loss        | 7.3611736e-05 |
| serial_times

Round done
--------------------------------------
| approxkl           | 1.7125823e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.27e+03      |
| ep_reward_mean     | 4.1           |
| explained_variance | 0.00181       |
| fps                | 10            |
| n_updates          | 1949          |
| policy_entropy     | 2.203922      |
| policy_loss        | -8.925982e-05 |
| serial_timesteps   | 249472        |
| time_elapsed       | 1.55e+04      |
| total_timesteps    | 249472        |
| value_loss         | 0.005796832   |
--------------------------------------
--------------------------------------
| approxkl           | 1.15398e-06   |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.27e+03      |
| ep_reward_mean     | 4.1           |
| explained_variance | -0.00448      |
| fps                | 21            |
| n_updates          | 1950          |
| policy_entropy     | 2.207725      |
| policy_loss        | -7.987581e-05 |
| serial_times

Stage done
---------------------------------------
| approxkl           | 1.983541e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.27e+03       |
| ep_reward_mean     | 4.1            |
| explained_variance | 0.000552       |
| fps                | 9              |
| n_updates          | 1963           |
| policy_entropy     | 2.2397065      |
| policy_loss        | -0.00019458309 |
| serial_timesteps   | 251264         |
| time_elapsed       | 1.57e+04       |
| total_timesteps    | 251264         |
| value_loss         | 0.007431876    |
---------------------------------------
--------------------------------------
| approxkl           | 1.2636594e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.27e+03      |
| ep_reward_mean     | 4.1           |
| explained_variance | 0.000875      |
| fps                | 23            |
| n_updates          | 1964          |
| policy_entropy     | 2.2372293     |
| policy_loss        | 1.6892329e-05 |

Round done
--------------------------------------
| approxkl           | 1.3666404e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.28e+03      |
| ep_reward_mean     | 4.1           |
| explained_variance | -0.00255      |
| fps                | 11            |
| n_updates          | 1977          |
| policy_entropy     | 2.1998544     |
| policy_loss        | -7.075677e-05 |
| serial_timesteps   | 253056        |
| time_elapsed       | 1.58e+04      |
| total_timesteps    | 253056        |
| value_loss         | 0.010657747   |
--------------------------------------
---------------------------------------
| approxkl           | 7.8705045e-07  |
| clipfrac           | 0.0            |
| ep_len_mean        | 3.28e+03       |
| ep_reward_mean     | 4.1            |
| explained_variance | -0.00435       |
| fps                | 22             |
| n_updates          | 1978           |
| policy_entropy     | 2.2067444      |
| policy_loss        | -5.4568052e-05 |
| se

--------------------------------------
| approxkl           | 2.0225824e-07 |
| clipfrac           | 0.0           |
| ep_len_mean        | 3.28e+03      |
| ep_reward_mean     | 4.1           |
| explained_variance | 0.00228       |
| fps                | 24            |
| n_updates          | 1991          |
| policy_entropy     | 2.193215      |
| policy_loss        | -3.297627e-05 |
| serial_timesteps   | 254848        |
| time_elapsed       | 1.59e+04      |
| total_timesteps    | 254848        |
| value_loss         | 0.0061155716  |
--------------------------------------
Episode done
Restarting game
Selecting Kasumi
Waiting for fight to start


In [None]:
# Save the agent
model.save(modelFolder+"4_3Msteps_action+_add")

In [None]:
%tensorboard --logdir ./tensorBoardFolder/

In [None]:
observation = env.reset()
states = None

while True:

    action, states = model.predict(observation, states, deterministic=False)
    action_prob = model.action_probability(observation, states)
    print("Action probabilities = ", action_prob)
    print("Max action = ", np.argmax(action_prob))
    print("Action = ", action)
    input("Pausa")
    
    observation, reward, done, info = env.step(action)
    if done:
        observation = env.reset()
        states = None
        
env.close()

In [None]:
observation = env.reset()
states = None

cumulativeEpRew = 0.0
cumulativeEpRewAll = []
cumulativeTotRew = 0.0

maxNumEp = 100
currNumEp = 0

while currNumEp < maxNumEp:

    action, states = model.predict(observation, states, deterministic=False)
    action_prob = model.action_probability(observation, states)
    print("Action probabilities = ", action_prob)
    print("Max action = ", np.argmax(action_prob))
    print("Action = ", action)
    
    observation, reward, done, info = env.step(action)
    
    cumulativeEpRew += reward
    
    if np.any(done):
        currNumEp += 1
        print("Ep. # = ", currNumEp)
        print("Ep. Cumulative Rew # = ", cumulativeEpRew)
        sys.stdout.flush()
        cumulativeEpRewAll.append(cumulativeEpRew)
        cumulativeTotRew += cumulativeEpRew
        cumulativeEpRew = 0.0

        observation = env.reset()
        states = None

print("Mean cumulative reward = ", cumulativeTotRew/maxNumEp)    
print("Mean cumulative reward = ", np.mean(cumulativeEpRewAll))    
print("Std cumulative reward = ", np.std(cumulativeEpRewAll))   
    
env.close()