In [1]:
%matplotlib tk

import argparse
import gym
import datetime
import os
import random
import tempfile
import numpy as np
import pickle

import ray
from ray import tune
from ray.tune.logger import Logger, UnifiedLogger, pretty_print
from ray.rllib.env.multi_agent_env import make_multi_agent
from ray.rllib.examples.models.shared_weights_model import TF2SharedWeightsModel
from ray.rllib.models import ModelCatalog
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.test_utils import check_learning_achieved
from ray.rllib.agents.ppo import ppo, PPOTrainer, PPOTFPolicy
from ray.rllib.models import ModelCatalog
from ray.rllib.policy.policy import PolicySpec
from environment_rllib_3d import MyEnv
from settings.initial_settings import *
from settings.reset_conditions import reset_conditions
#from modules.models import MyConv2DModel_v0B_Small_CBAM_1DConv_Share
from modules.models import DenseNetModelLarge
from tensorflow.keras.utils import plot_model
from modules.savers import save_conditions
from utility.result_env import render_env
from utility.terminate_uavsimproc import teminate_proc
import matplotlib.pyplot as plt
import matplotlib
import tensorflow as tf
import cv2
import ctypes
import warnings

#UCAV.exeが起動している場合、プロセスキルする。
teminate_proc.UAVsimprockill(proc_name="UCAV.exe")

warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) 
warnings.filterwarnings('ignore', category=matplotlib.MatplotlibDeprecationWarning)
np.set_printoptions(precision=3, suppress=True)
PROJECT = "UCAV"
TRIAL_ID = 2
TRIAL = 'test_' + str(TRIAL_ID)
EVAL_FREQ = 10
CONTINUAL = False

def custom_log_creator(custom_path, custom_str):
    timestr = datetime.datetime.today().strftime("%Y-%m-%d_%H-%M-%S")
    logdir_prefix = "{}_{}".format(custom_str, timestr)

    def logger_creator(config):
        if not os.path.exists(custom_path):
            os.makedirs(custom_path)
        logdir = tempfile.mkdtemp(prefix=logdir_prefix, dir=custom_path)
        return UnifiedLogger(config, logdir, loggers=None)

    return logger_creator

ray.shutdown()
ray.init(ignore_reinit_error=True, log_to_driver=False)

ModelCatalog.register_custom_model('my_model', DenseNetModelLarge)

# config = {"env": MyEnv,
#           "num_workers": NUM_WORKERS,
#           "num_gpus": NUM_GPUS,
#           "num_cpus_per_worker": NUM_CPUS_PER_WORKER,
#           "num_sgd_iter": NUM_SGD_ITER,
#           "lr": LEARNING_RATE,
#           "gamma": GAMMA,  # default=0.99
#           "model": {"custom_model": "my_model"}
#           # "framework": framework
#           }  # use tensorflow 2
eval_env = MyEnv({})
policies = {
    #"blue_1": PolicySpec(config={"gamma": 0.99}),
    #"blue_2": PolicySpec(config={"gamma": 0.95}),
    "blue_0": (PPOTFPolicy, eval_env.observation_space, eval_env.action_space, {}),
    "blue_1": (PPOTFPolicy, eval_env.observation_space, eval_env.action_space, {}),
}
policy_ids = list(policies.keys())

def policy_mapping_fn(agent_id, episode, **kwargs):
    #print(agent_id,episode)
    #pol_id = policy_ids[agent_id]

    pol_id = agent_id
    return pol_id

# Instanciate the evaluation env

config = {"env": MyEnv,"num_gpus": 0,"num_workers": 0, "num_cpus_per_worker": 0,"num_gpus_per_worker": 0,
          "create_env_on_driver": True,"train_batch_size": 256,"batch_mode": "complete_episodes",
          "multiagent": {"policies": policies,  "policy_mapping_fn": policy_mapping_fn}
         }
conditions_dir = os.path.join('./' + PROJECT + '/conditions/')

if not os.path.exists(conditions_dir):
    os.makedirs(conditions_dir)
save_conditions(conditions_dir)

# PPOTrainer()は、try_import_tfを使うと、なぜかTensorflowのeager modeのエラーになる。

trainer = ppo.PPOTrainer(config=config,
                         logger_creator=custom_log_creator(
                             os.path.expanduser("./" + PROJECT + "/logs"), TRIAL))

if CONTINUAL:
    # Continual learning: Need to specify the checkpoint
    model_path = PROJECT + '/checkpoints/' + TRIAL + '/checkpoint_000039/checkpoint-39'
    trainer.restore(checkpoint_path=model_path)

# models_dir = os.path.join('./' + PROJECT + '/models/')
# if not os.path.exists(models_dir):
#     os.makedirs(models_dir)
# text_name = models_dir + TRIAL + '.txt'
# with open(text_name, "w") as fp:
#     trainer.get_policy().model.base_model.summary(print_fn=lambda x: fp.write(x + "\r\n"))
# png_name = models_dir + TRIAL + '.png'
# plot_model(trainer.get_policy().model.base_model, to_file=png_name, show_shapes=True)



# Define checkpoint dir
check_point_dir = os.path.join('./' + PROJECT + '/checkpoints/', TRIAL)
if not os.path.exists(check_point_dir):
    os.makedirs(check_point_dir)

2021-12-09 01:49:46,253	INFO trainer.py:723 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also want to then set `eager_tracing=True` in order to reach similar execution speed as with static-graph mode.
2021-12-09 01:49:46,255	INFO ppo.py:167 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
2021-12-09 01:49:46,256	INFO trainer.py:745 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


In [None]:
#def getkey(key):
    # return 111
#    return(bool(ctypes.windll.user32.GetAsyncKeyState(key) & 0x8000))
# Training & evaluation

record_mode = 1
results_dir = os.path.join('./' + PROJECT + '/results/')

if not os.path.exists(results_dir):
    os.makedirs(results_dir)
results_file = results_dir + TRIAL + '.pkl'
for steps in range(10001):
    # Training
    print(f'\n----------------- Training at steps:{steps} start! -----------------')
    eval_env.reset()
    results = trainer.train()
    print(pretty_print(results))
    check_point = trainer.save(checkpoint_dir=check_point_dir)
    # Evaluation
    if steps % EVAL_FREQ == 0:
        print(f'\n----------------- Evaluation at steps:{steps} starting ! -----------------')
        print(pretty_print(results))
        check_point = trainer.save(checkpoint_dir=check_point_dir)
        win = 0
        for i in range(NUM_EVAL*0):
            # print(f'\nEvaluation {i}:')
            obs = eval_env.reset()
            done = False
            
            step_num = 0
            fig = plt.figure(1)
            ESC = 0x1B          # ESCキーの仮想キーコード
            trajectory_length = 100
            env_blue_pos = [0]
            env_red_pos = [0]
            env_mrm_pos = [0]
            if record_mode == 0:
                file_name = "test_num" + str(steps) +str(i)
                video = cv2.VideoWriter(file_name+'.mp4',0x00000020,20.0,(eval_env.WINNDOW_SIZE_lon,eval_env.WINDOW_SIZE_lat))

            while True:
                action_dict = {}
                for j in range(eval_env.blue_num):
                    #if not eval_env.blue[j].hitpoint == 0:
                    #action_dict['blue_' + str(j)] = trainer.compute_action(obs['blue_' + str(j)])
                    action_dict['blue_' + str(j)] = trainer.compute_single_action(obs['blue_' + str(j)],policy_id='blue_' + str(j))
                obs, rewards, dones, infos = eval_env.step(action_dict)
                env_blue_pos_temp, env_red_pos_temp, env_mrm_pos_temp= render_env.copy_from_env(eval_env)
                env_blue_pos.append(env_blue_pos_temp)
                env_red_pos.append(env_red_pos_temp)
                env_mrm_pos.append(env_mrm_pos_temp)
                if step_num == 0:
                    del env_blue_pos[0]
                    del env_red_pos[0]
                    del env_mrm_pos[0]

                hist_blue_pos = np.vstack(env_blue_pos)
                hist_red_pos = np.vstack(env_red_pos)
                hist_mrm_pos = np.vstack(env_mrm_pos)
                plt.clf()
                render_env.rend_3d(eval_env,hist_blue_pos,"b",1)
                render_env.rend_3d(eval_env,hist_red_pos,"r",1)
                render_env.rend_3d(eval_env,hist_mrm_pos,"k",1)
                fig.canvas.draw()
                plt.pause(.05)
                if record_mode == 0:
                    img = np.array(fig.canvas.renderer.buffer_rgba())
                    img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
                    # cv2.imshow('test', img)
                    # cv2.waitKey(1)
                    # cv2.destroyAllWindows()
                    video.write(img.astype('uint8'))

                
                step_num = step_num + 1
                
                done = dones["__all__"]
                #print(f'rewards:{rewards}')
                #if record_mode == 0:
                #    img = eval_env.render_movie(file_name,step_num)
                #    video.write(img.astype('unit8'))
                #elif record_mode == 1:
                #    eval_env.render()
                #elif record_mode == 2:
                #    eval_env.render()
                    
                #env_blue_pos_temp, env_red_pos_temp, env_mrm_pos_temp = render_env.copy_from_env(eval_env)
                
                #env_blue_pos.append(env_blue_pos_temp)
                #env_red_pos.append(env_red_pos_temp)
                #env_mrm_pos.append(env_mrm_pos_temp)
                #step_num = step_num + 1
                # エピソードの終了処理
                if dones['__all__']:
                    # print(f'all done at {env.steps}')
                    break
                
            #del env_blue_pos[0]
            #del env_red_pos[0]
            #del env_mrm_pos[0]
            
            #hist_blue_pos = np.vstack(env_blue_pos)
            #hist_red_pos = np.vstack(env_red_pos)
            #hist_mrm_pos = np.vstack(env_mrm_pos)
            
            #f = open(results_file,'wb')
            #pickle.dump(emv_blue_pos,f)
            #pickle.dump(emv_red_pos,f)
            #pickle.dump(emv_mrm_pos,f)
            #f.close()
            
            if record_mode == 0:
                video.release()

ray.shutdown()


----------------- Training at steps:0 start! -----------------
blue_0DOWN
LOSE
blue_0 True True 200 -2.0 -1.0049999999999992
blue_1 True True 200 0.005 1.0000000000000007
blue_1DOWN
LOSE
blue_0 True True 85 0.005 0.42500000000000027
blue_1 True True 85 -2.0 -1.5799999999999996




agent_timesteps_total: 570
custom_metrics: {}
date: 2021-12-09_01-51-01
done: false
episode_len_mean: 142.5
episode_media: {}
episode_reward_max: -0.005000000000020437
episode_reward_mean: -0.58000000000001
episode_reward_min: -1.1549999999999994
episodes_this_iter: 2
episodes_total: 2
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 4.999999873689376e-05
        entropy: 8.571272850036621
        entropy_coeff: 0.0
        kl: 0.021246980875730515
        model: {}
        policy_loss: 0.020580817013978958
        total_loss: 0.4572502672672272
        vf_explained_var: 0.13450315594673157
        vf_loss: 0.43242010474205017
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 4.999999873689376e-05
        entropy: 8.505207061767578
        entropy_coeff: 0.0
        kl: 0.0

blue_1DOWN
LOSE
blue_0 True True 141 0.005 0.7050000000000005
blue_1 True True 141 -2.0 -1.2999999999999994
blue_0DOWN
LOSE
blue_0 True True 40 -2.0 -1.805
blue_1 True True 40 0.005 0.2000000000000001
blue_1DOWN
LOSE
blue_0 True True 165 0.005 0.8250000000000006
blue_1 True True 165 -2.0 -1.1799999999999993
agent_timesteps_total: 3062
custom_metrics: {}
date: 2021-12-09_01-55-26
done: false
episode_len_mean: 139.1818181818182
episode_media: {}
episode_reward_max: 1.819999999999971
episode_reward_mean: -0.5918181818181899
episode_reward_min: -1.605
episodes_this_iter: 3
episodes_total: 11
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 0.30000001192092896
        cur_lr: 4.999999873689376e-05
        entropy: 8.396687507629395
        entropy_coeff: 0.0
        kl: 0.015333007089793682
        model: {}
        policy_loss: 0.042596373707056046
        total_loss: 0.5205609

blue_0DOWN
LOSE
blue_0 True True 135 -2.0 -1.3299999999999996
blue_1 True True 135 0.005 0.6750000000000005
blue_1DOWN
LOSE
blue_0 True True 96 0.005 0.4800000000000003
blue_1 True True 96 -2.0 -1.5249999999999997
blue_0DOWN
LOSE
blue_0 True True 242 -1.998 -0.7720000000000036
blue_1 True True 242 0.005 1.2099999999999962
agent_timesteps_total: 5608
custom_metrics: {}
date: 2021-12-09_01-59-46
done: false
episode_len_mean: 140.2
episode_media: {}
episode_reward_max: 1.819999999999971
episode_reward_mean: -0.5901000000000086
episode_reward_min: -1.605
episodes_this_iter: 3
episodes_total: 20
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 0.30000001192092896
        cur_lr: 4.999999873689376e-05
        entropy: 8.437850952148438
        entropy_coeff: 0.0
        kl: 0.01497672125697136
        model: {}
        policy_loss: -0.2831854224205017
        total_loss: -0.07643

blue_0DOWN
LOSE
blue_0 True True 136 -2.0 -1.3249999999999995
blue_1 True True 136 0.005 0.6800000000000005
blue_0DOWN
LOSE
blue_0 True True 69 -2.0 -1.6599999999999997
blue_1 True True 69 0.005 0.3450000000000002
blue_0DOWN
LOSE
blue_0 True True 62 -2.0 -1.6949999999999998
blue_1 True True 62 0.005 0.31000000000000016
agent_timesteps_total: 7862
custom_metrics: {}
date: 2021-12-09_02-03-29
done: false
episode_len_mean: 140.39285714285714
episode_media: {}
episode_reward_max: 1.819999999999971
episode_reward_mean: -0.5896785714285803
episode_reward_min: -1.605
episodes_this_iter: 3
episodes_total: 28
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 0.30000001192092896
        cur_lr: 4.999999873689376e-05
        entropy: 8.425164222717285
        entropy_coeff: 0.0
        kl: 0.017509035766124725
        model: {}
        policy_loss: -0.12010315805673599
        total_lo

blue_0DOWN
LOSE
blue_0 True True 88 -2.0 -1.5649999999999997
blue_1 True True 88 0.005 0.4400000000000003
blue_1DOWN
LOSE
blue_0 True True 161 0.005 0.8050000000000006
blue_1 True True 161 -2.0 -1.1999999999999993
blue_1DOWN
LOSE
blue_0 True True 139 0.005 0.6950000000000005
blue_1 True True 139 -2.0 -1.3099999999999996
agent_timesteps_total: 10306
custom_metrics: {}
date: 2021-12-09_02-06-53
done: false
episode_len_mean: 147.22857142857143
episode_media: {}
episode_reward_max: 2.581999999999922
episode_reward_mean: -0.5156857142857248
episode_reward_min: -1.605
episodes_this_iter: 3
episodes_total: 35
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 0.675000011920929
        cur_lr: 4.999999873689376e-05
        entropy: 8.244721412658691
        entropy_coeff: 0.0
        kl: 0.01834145374596119
        model: {}
        policy_loss: -0.07655520737171173
        total_los

agent_timesteps_total: 12282
custom_metrics: {}
date: 2021-12-09_02-09-36
done: false
episode_len_mean: 153.525
episode_media: {}
episode_reward_max: 2.581999999999922
episode_reward_mean: -0.45515000000001116
episode_reward_min: -1.605
episodes_this_iter: 1
episodes_total: 40
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 0.675000011920929
        cur_lr: 4.999999873689376e-05
        entropy: 8.603569030761719
        entropy_coeff: 0.0
        kl: 0.016534652560949326
        model: {}
        policy_loss: -0.11024714261293411
        total_loss: -0.036456190049648285
        vf_explained_var: -0.8656741380691528
        vf_loss: 0.0626300722360611
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 0.675000011920929
        cur_lr: 4.999999873689376e-05
        entropy: 8.618038177490234
        entropy_coeff: 0.0
        kl: 0.014885664917

blue_0DOWN
LOSE
blue_0 True True 137 -2.0 -1.3199999999999994
blue_1 True True 137 0.005 0.6850000000000005
blue_1DOWN
LOSE
blue_0 True True 387 0.005 1.9349999999999807
blue_1 True True 387 -2.0 -0.08400000000001762
agent_timesteps_total: 15594
custom_metrics: {}
date: 2021-12-09_02-13-52
done: false
episode_len_mean: 162.4375
episode_media: {}
episode_reward_max: 2.581999999999922
episode_reward_mean: -0.35920833333334606
episode_reward_min: -1.605
episodes_this_iter: 2
episodes_total: 48
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 0.675000011920929
        cur_lr: 4.999999873689376e-05
        entropy: 8.734068870544434
        entropy_coeff: 0.0
        kl: 0.01578766293823719
        model: {}
        policy_loss: -0.06452318280935287
        total_loss: 0.41196775436401367
        vf_explained_var: -0.02040175534784794
        vf_loss: 0.46583423018455505
    blu

blue_1DOWN
LOSE
blue_0 True True 92 0.005 0.4600000000000003
blue_1 True True 92 -2.0 -1.5449999999999997
blue_1DOWN
LOSE
blue_0 True True 69 0.005 0.3450000000000002
blue_1 True True 69 -2.0 -1.6599999999999997
blue_1DOWN
LOSE
blue_0 True True 171 0.005 0.8550000000000006
blue_1 True True 171 -2.0 -1.1499999999999995
agent_timesteps_total: 17554
custom_metrics: {}
date: 2021-12-09_02-16-48
done: false
episode_len_mean: 159.5818181818182
episode_media: {}
episode_reward_max: 2.581999999999922
episode_reward_mean: -0.39074545454546694
episode_reward_min: -1.605
episodes_this_iter: 3
episodes_total: 55
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 0.675000011920929
        cur_lr: 4.999999873689376e-05
        entropy: 8.186394691467285
        entropy_coeff: 0.0
        kl: 0.023703692480921745
        model: {}
        policy_loss: -0.2188088446855545
        total_loss:

LOSE
blue_0 False False 600 -0.995 1.9999999999999583
blue_1 False False 600 -0.995 2.026999999999959
agent_timesteps_total: 20010
custom_metrics: {}
date: 2021-12-09_02-20-19
done: false
episode_len_mean: 161.3709677419355
episode_media: {}
episode_reward_max: 4.026999999999897
episode_reward_mean: -0.37441935483872263
episode_reward_min: -1.605
episodes_this_iter: 1
episodes_total: 62
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.0125000476837158
        cur_lr: 4.999999873689376e-05
        entropy: 8.678204536437988
        entropy_coeff: 0.0
        kl: 0.006921911612153053
        model: {}
        policy_loss: -0.3925185799598694
        total_loss: -0.3176053464412689
        vf_explained_var: -0.647697389125824
        vf_loss: 0.06790483742952347
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.0125000476837158
        cur_lr:

blue_1DOWN
LOSE
blue_0 True True 262 0.005 1.309999999999994
blue_1 True True 262 -1.998 -0.45200000000000173
agent_timesteps_total: 22068
custom_metrics: {}
date: 2021-12-09_02-23-38
done: false
episode_len_mean: 157.62857142857143
episode_media: {}
episode_reward_max: 4.026999999999897
episode_reward_mean: -0.4389428571428694
episode_reward_min: -2.9400000000000004
episodes_this_iter: 2
episodes_total: 70
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.0125000476837158
        cur_lr: 4.999999873689376e-05
        entropy: 8.419123649597168
        entropy_coeff: 0.0
        kl: 0.01901080645620823
        model: {}
        policy_loss: -0.20868441462516785
        total_loss: 0.12868963181972504
        vf_explained_var: 0.40314796566963196
        vf_loss: 0.3181256651878357
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.01250004768

blue_1DOWN
LOSE
blue_0 True True 274 0.005 1.3699999999999928
blue_1 True True 274 -2.0 -0.6350000000000071
agent_timesteps_total: 24084
custom_metrics: {}
date: 2021-12-09_02-26-20
done: false
episode_len_mean: 160.56
episode_media: {}
episode_reward_max: 4.026999999999897
episode_reward_mean: -0.40894666666667956
episode_reward_min: -2.9400000000000004
episodes_this_iter: 1
episodes_total: 75
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.0125000476837158
        cur_lr: 4.999999873689376e-05
        entropy: 8.467756271362305
        entropy_coeff: 0.0
        kl: 0.012324532493948936
        model: {}
        policy_loss: -0.06814851611852646
        total_loss: 0.09138258546590805
        vf_explained_var: -0.9845232367515564
        vf_loss: 0.14705252647399902
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.0125000476837158
     

blue_0DOWN
LOSE
blue_0 True True 35 -2.0 -1.8299999999999998
blue_1 True True 35 0.005 0.17500000000000007
blue_1DOWN
LOSE
blue_0 True True 379 0.005 1.8949999999999816
blue_1 True True 379 -2.0 -0.08200000000001828
agent_timesteps_total: 26800
custom_metrics: {}
date: 2021-12-09_02-30-17
done: false
episode_len_mean: 159.52380952380952
episode_media: {}
episode_reward_max: 4.026999999999897
episode_reward_mean: -0.4179523809523938
episode_reward_min: -2.9400000000000004
episodes_this_iter: 3
episodes_total: 84
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.0125000476837158
        cur_lr: 4.999999873689376e-05
        entropy: 8.7182035446167
        entropy_coeff: 0.0
        kl: 0.0142704788595438
        model: {}
        policy_loss: -0.07828971743583679
        total_loss: 0.5306165814399719
        vf_explained_var: -0.4397851824760437
        vf_loss: 0.59445738

agent_timesteps_total: 29026
custom_metrics: {}
date: 2021-12-09_02-33-19
done: false
episode_len_mean: 161.25555555555556
episode_media: {}
episode_reward_max: 4.026999999999897
episode_reward_mean: -0.400477777777791
episode_reward_min: -2.9400000000000004
episodes_this_iter: 1
episodes_total: 90
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.0125000476837158
        cur_lr: 4.999999873689376e-05
        entropy: 9.314188957214355
        entropy_coeff: 0.0
        kl: 0.007322967518121004
        model: {}
        policy_loss: -0.5401609539985657
        total_loss: -0.465212881565094
        vf_explained_var: 0.29244616627693176
        vf_loss: 0.06753359735012054
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.0125000476837158
        cur_lr: 4.999999873689376e-05
        entropy: 8.579607009887695
        entropy_coeff: 0.0
     

blue_1DOWN
LOSE
blue_0 True True 79 0.005 0.39500000000000024
blue_1 True True 79 -2.0 -1.6099999999999999
blue_1DOWN
LOSE
blue_0 True True 78 0.005 0.39000000000000024
blue_1 True True 78 -2.0 -1.6149999999999998
blue_1DOWN
LOSE
blue_0 True True 154 0.005 0.7700000000000006
blue_1 True True 154 -2.0 -1.2349999999999994
agent_timesteps_total: 31108
custom_metrics: {}
date: 2021-12-09_02-36-27
done: false
episode_len_mean: 160.35051546391753
episode_media: {}
episode_reward_max: 4.026999999999897
episode_reward_mean: -0.40880412371135344
episode_reward_min: -2.9400000000000004
episodes_this_iter: 3
episodes_total: 97
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.0125000476837158
        cur_lr: 4.999999873689376e-05
        entropy: 8.647622108459473
        entropy_coeff: 0.0
        kl: 0.01534600555896759
        model: {}
        policy_loss: -0.13274747133255005
  

blue_1DOWN
LOSE
blue_0 True True 94 0.005 0.4700000000000003
blue_1 True True 94 -2.0 -1.5349999999999997
agent_timesteps_total: 32846
custom_metrics: {}
date: 2021-12-09_02-39-06
done: false
episode_len_mean: 160.06
episode_media: {}
episode_reward_max: 4.026999999999897
episode_reward_mean: -0.41149000000001307
episode_reward_min: -2.9400000000000004
episodes_this_iter: 2
episodes_total: 103
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.0125000476837158
        cur_lr: 4.999999873689376e-05
        entropy: 8.59394359588623
        entropy_coeff: 0.0
        kl: 0.01484588347375393
        model: {}
        policy_loss: -0.10387764126062393
        total_loss: -0.0011894300114363432
        vf_explained_var: -0.18023373186588287
        vf_loss: 0.08765673637390137
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.0125000476837158
    

blue_1DOWN
LOSE
blue_0 True True 171 0.005 0.8550000000000006
blue_1 True True 171 -2.0 -1.1499999999999995
blue_1DOWN
LOSE
blue_0 True True 87 0.005 0.4350000000000003
blue_1 True True 87 -2.0 -1.5699999999999998
agent_timesteps_total: 35330
custom_metrics: {}
date: 2021-12-09_02-42-31
done: false
episode_len_mean: 161.34
episode_media: {}
episode_reward_max: 4.026999999999897
episode_reward_mean: -0.40104000000001333
episode_reward_min: -2.9400000000000004
episodes_this_iter: 2
episodes_total: 111
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.0125000476837158
        cur_lr: 4.999999873689376e-05
        entropy: 8.499217987060547
        entropy_coeff: 0.0
        kl: 0.014788059517741203
        model: {}
        policy_loss: -0.08298910409212112
        total_loss: 0.07937650382518768
        vf_explained_var: 0.2549959719181061
        vf_loss: 0.1473926901817321

blue_0DOWN
LOSE
blue_0 True True 176 -2.0 -1.1249999999999993
blue_1 True True 176 0.005 0.8800000000000007
blue_1DOWN
LOSE
blue_0 True True 438 0.005 2.1899999999999755
blue_1 True True 438 -2.001 0.29399999999998805
agent_timesteps_total: 37946
custom_metrics: {}
date: 2021-12-09_02-46-04
done: false
episode_len_mean: 165.07
episode_media: {}
episode_reward_max: 4.026999999999897
episode_reward_mean: -0.3626500000000143
episode_reward_min: -2.9400000000000004
episodes_this_iter: 2
episodes_total: 118
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.0125000476837158
        cur_lr: 4.999999873689376e-05
        entropy: 8.974084854125977
        entropy_coeff: 0.0
        kl: 0.01594521850347519
        model: {}
        policy_loss: -0.013280787505209446
        total_loss: 0.42634615302085876
        vf_explained_var: -0.34951403737068176
        vf_loss: 0.42348241806

agent_timesteps_total: 39626
custom_metrics: {}
date: 2021-12-09_02-48-39
done: false
episode_len_mean: 164.23
episode_media: {}
episode_reward_max: 4.026999999999897
episode_reward_mean: -0.37128000000001443
episode_reward_min: -2.9400000000000004
episodes_this_iter: 2
episodes_total: 124
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.0125000476837158
        cur_lr: 4.999999873689376e-05
        entropy: 8.569992065429688
        entropy_coeff: 0.0
        kl: 0.013209039345383644
        model: {}
        policy_loss: -0.2599874436855316
        total_loss: 0.1690317690372467
        vf_explained_var: 0.12981559336185455
        vf_loss: 0.4156450629234314
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.0125000476837158
        cur_lr: 4.999999873689376e-05
        entropy: 8.23942756652832
        entropy_coeff: 0.0
        kl: 0.01

blue_1DOWN
LOSE
blue_0 True True 162 0.005 0.8100000000000006
blue_1 True True 162 -2.0 -1.1949999999999994
blue_0DOWN
LOSE
blue_0 True True 93 -2.0 -1.5399999999999996
blue_1 True True 93 0.005 0.4650000000000003
blue_0DOWN
LOSE
blue_0 True True 123 -2.0 -1.3899999999999997
blue_1 True True 123 0.005 0.6150000000000004
agent_timesteps_total: 43158
custom_metrics: {}
date: 2021-12-09_02-52-50
done: false
episode_len_mean: 169.02
episode_media: {}
episode_reward_max: 4.145999999999909
episode_reward_mean: -0.32496000000001496
episode_reward_min: -2.9400000000000004
episodes_this_iter: 3
episodes_total: 131
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.0125000476837158
        cur_lr: 4.999999873689376e-05
        entropy: 7.937071323394775
        entropy_coeff: 0.0
        kl: 0.01536607462912798
        model: {}
        policy_loss: -0.3995071351528168
        total_

blue_0DOWN
LOSE
blue_0 True True 120 -2.0 -1.4049999999999996
blue_1 True True 120 0.005 0.6000000000000004
LOSE
blue_0 False True 449 -0.995 1.2649999999999744
blue_1 False True 449 -0.993 1.579999999999993
agent_timesteps_total: 45830
custom_metrics: {}
date: 2021-12-09_02-56-13
done: false
episode_len_mean: 175.0
episode_media: {}
episode_reward_max: 4.145999999999909
episode_reward_mean: -0.26156000000001617
episode_reward_min: -2.9400000000000004
episodes_this_iter: 2
episodes_total: 137
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.5187499523162842
        cur_lr: 4.999999873689376e-05
        entropy: 8.785264015197754
        entropy_coeff: 0.0
        kl: 0.013143614865839481
        model: {}
        policy_loss: -0.1856577843427658
        total_loss: 0.17618057131767273
        vf_explained_var: -0.45084697008132935
        vf_loss: 0.3418765366077423
    b

agent_timesteps_total: 49062
custom_metrics: {}
date: 2021-12-09_02-59-58
done: false
episode_len_mean: 179.77
episode_media: {}
episode_reward_max: 4.145999999999909
episode_reward_mean: -0.21353000000001743
episode_reward_min: -2.9400000000000004
episodes_this_iter: 1
episodes_total: 143
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.5187499523162842
        cur_lr: 4.999999873689376e-05
        entropy: 8.675944328308105
        entropy_coeff: 0.0
        kl: 0.008343678899109364
        model: {}
        policy_loss: -0.3055329918861389
        total_loss: -0.20895807445049286
        vf_explained_var: -1.0
        vf_loss: 0.0839029848575592
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.5187499523162842
        cur_lr: 4.999999873689376e-05
        entropy: 8.068110466003418
        entropy_coeff: 0.0
        kl: 0.00651830108836

blue_0DOWN
LOSE
blue_0 True True 196 -2.0 -1.0249999999999992
blue_1 True True 196 0.005 0.9800000000000008
blue_1DOWN
LOSE
blue_0 True True 340 0.005 1.7399999999999878
blue_1 True True 340 -2.0 -0.24800000000001332
agent_timesteps_total: 52024
custom_metrics: {}
date: 2021-12-09_03-03-40
done: false
episode_len_mean: 179.32
episode_media: {}
episode_reward_max: 4.145999999999909
episode_reward_mean: -0.22052000000001723
episode_reward_min: -2.9400000000000004
episodes_this_iter: 2
episodes_total: 150
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.5187499523162842
        cur_lr: 4.999999873689376e-05
        entropy: 8.159573554992676
        entropy_coeff: 0.0
        kl: 0.013417662121355534
        model: {}
        policy_loss: -0.07394915074110031
        total_loss: 0.17489632964134216
        vf_explained_var: -0.14818374812602997
        vf_loss: 0.22846741974

blue_1DOWN
LOSE
blue_0 True True 216 0.005 1.079999999999999
blue_1 True True 216 -2.0 -0.9250000000000009
blue_1DOWN
LOSE
blue_0 True True 176 0.005 0.8800000000000007
blue_1 True True 176 -2.0 -1.1249999999999993
agent_timesteps_total: 54432
custom_metrics: {}
date: 2021-12-09_03-07-09
done: false
episode_len_mean: 182.03
episode_media: {}
episode_reward_max: 4.145999999999909
episode_reward_mean: -0.19351000000001772
episode_reward_min: -2.9400000000000004
episodes_this_iter: 2
episodes_total: 157
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.5187499523162842
        cur_lr: 4.999999873689376e-05
        entropy: 8.785709381103516
        entropy_coeff: 0.0
        kl: 0.01328942272812128
        model: {}
        policy_loss: -0.09134615957736969
        total_loss: -0.0390811525285244
        vf_explained_var: -0.20146313309669495
        vf_loss: 0.03208170831203

blue_1DOWN
LOSE
blue_0 True True 378 0.005 1.8899999999999817
blue_1 True True 378 -2.0 -0.04100000000001813
agent_timesteps_total: 57618
custom_metrics: {}
date: 2021-12-09_03-10-56
done: false
episode_len_mean: 186.97
episode_media: {}
episode_reward_max: 4.145999999999909
episode_reward_mean: -0.1416700000000185
episode_reward_min: -2.9400000000000004
episodes_this_iter: 1
episodes_total: 163
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.5187499523162842
        cur_lr: 4.999999873689376e-05
        entropy: 8.974848747253418
        entropy_coeff: 0.0
        kl: 0.009504171088337898
        model: {}
        policy_loss: -0.22564543783664703
        total_loss: -0.18846264481544495
        vf_explained_var: -1.0
        vf_loss: 0.022748315706849098
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.5187499523162842
        cur_lr: 4

agent_timesteps_total: 59904
custom_metrics: {}
date: 2021-12-09_03-14-17
done: false
episode_len_mean: 187.22
episode_media: {}
episode_reward_max: 4.145999999999909
episode_reward_mean: -0.12155000000001852
episode_reward_min: -1.745
episodes_this_iter: 3
episodes_total: 171
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.5187499523162842
        cur_lr: 4.999999873689376e-05
        entropy: 8.089229583740234
        entropy_coeff: 0.0
        kl: 0.016664696857333183
        model: {}
        policy_loss: -0.234964981675148
        total_loss: 0.1811337172985077
        vf_explained_var: 0.02217433787882328
        vf_loss: 0.3907892107963562
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.5187499523162842
        cur_lr: 4.999999873689376e-05
        entropy: 7.242575645446777
        entropy_coeff: 0.0
        kl: 0.012510290369391

LOSE
blue_0 False False 600 -0.996 2.0039999999999734
blue_1 False False 600 -0.995 2.2999999999999603
agent_timesteps_total: 62370
custom_metrics: {}
date: 2021-12-09_03-17-05
done: false
episode_len_mean: 190.99
episode_media: {}
episode_reward_max: 4.303999999999868
episode_reward_mean: -0.08076000000001958
episode_reward_min: -1.745
episodes_this_iter: 1
episodes_total: 176
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.5187499523162842
        cur_lr: 4.999999873689376e-05
        entropy: 7.943665981292725
        entropy_coeff: 0.0
        kl: 0.008981694467365742
        model: {}
        policy_loss: -0.38648149371147156
        total_loss: -0.3143952786922455
        vf_explained_var: -0.857554018497467
        vf_loss: 0.0584452785551548
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 0.7593749761581421
        cur_lr: 4.999999

blue_1DOWN
LOSE
blue_0 True True 141 0.005 0.7050000000000005
blue_1 True True 141 -2.0 -1.2999999999999994
blue_1DOWN
LOSE
blue_0 True True 135 0.005 0.6750000000000005
blue_1 True True 135 -2.0 -1.3299999999999996
agent_timesteps_total: 65092
custom_metrics: {}
date: 2021-12-09_03-21-04
done: false
episode_len_mean: 191.0
episode_media: {}
episode_reward_max: 4.303999999999868
episode_reward_mean: -0.07826000000001943
episode_reward_min: -1.745
episodes_this_iter: 2
episodes_total: 185
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.5187499523162842
        cur_lr: 4.999999873689376e-05
        entropy: 7.969242572784424
        entropy_coeff: 0.0
        kl: 0.014594229869544506
        model: {}
        policy_loss: -0.23219749331474304
        total_loss: -0.19069026410579681
        vf_explained_var: 0.6924189925193787
        vf_loss: 0.019342225044965744
    blue

blue_1DOWN
LOSE
blue_0 True True 116 0.005 0.5800000000000004
blue_1 True True 116 -2.0 -1.4249999999999996
blue_1DOWN
LOSE
blue_0 True True 117 0.005 0.5850000000000004
blue_1 True True 117 -2.0 -1.4199999999999995
blue_0DOWN
LOSE
blue_0 True True 360 -2.0 -0.20500000000001628
blue_1 True True 360 0.024 2.5909999999999944
agent_timesteps_total: 68266
custom_metrics: {}
date: 2021-12-09_03-25-28
done: false
episode_len_mean: 188.9
episode_media: {}
episode_reward_max: 4.303999999999868
episode_reward_mean: -0.09126000000001859
episode_reward_min: -1.665
episodes_this_iter: 3
episodes_total: 194
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.5187499523162842
        cur_lr: 4.999999873689376e-05
        entropy: 8.091401100158691
        entropy_coeff: 0.0
        kl: 0.006840743590146303
        model: {}
        policy_loss: -0.38585183024406433
        total_loss: -0.

blue_0DOWN
LOSE
blue_0 True True 239 -2.0 -0.8100000000000034
blue_1 True True 239 0.005 1.1949999999999965
blue_1DOWN
LOSE
blue_0 True True 200 0.005 1.0000000000000007
blue_1 True True 200 -2.0 -1.0049999999999992
agent_timesteps_total: 70510
custom_metrics: {}
date: 2021-12-09_03-28-36
done: false
episode_len_mean: 192.85
episode_media: {}
episode_reward_max: 4.303999999999868
episode_reward_mean: -0.05148000000001937
episode_reward_min: -1.665
episodes_this_iter: 2
episodes_total: 200
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.5187499523162842
        cur_lr: 4.999999873689376e-05
        entropy: 8.625246047973633
        entropy_coeff: 0.0
        kl: 0.016484232619404793
        model: {}
        policy_loss: -0.032557412981987
        total_loss: 0.3729827105998993
        vf_explained_var: 0.21470732986927032
        vf_loss: 0.38050469756126404
    blue_1:

blue_0DOWN
LOSE
blue_0 True True 260 -2.0 -0.7050000000000056
blue_1 True True 260 0.005 1.2999999999999943
agent_timesteps_total: 72962
custom_metrics: {}
date: 2021-12-09_03-31-38
done: false
episode_len_mean: 196.31
episode_media: {}
episode_reward_max: 4.303999999999868
episode_reward_mean: -0.016620000000020122
episode_reward_min: -1.665
episodes_this_iter: 1
episodes_total: 205
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.5187499523162842
        cur_lr: 4.999999873689376e-05
        entropy: 8.697426795959473
        entropy_coeff: 0.0
        kl: 0.013994663953781128
        model: {}
        policy_loss: -0.13098792731761932
        total_loss: 0.03429168835282326
        vf_explained_var: 0.15751788020133972
        vf_loss: 0.14402523636817932
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.708593726158142
        cur_lr: 4

blue_1DOWN
LOSE
blue_0 True True 95 0.005 0.4750000000000003
blue_1 True True 95 -2.0 -1.5299999999999998
blue_0DOWN
LOSE
blue_0 True True 189 -2.0 -1.0599999999999992
blue_1 True True 189 0.005 0.9450000000000007
agent_timesteps_total: 75986
custom_metrics: {}
date: 2021-12-09_03-35-32
done: false
episode_len_mean: 199.44
episode_media: {}
episode_reward_max: 4.303999999999868
episode_reward_mean: 0.014609999999978958
episode_reward_min: -1.665
episodes_this_iter: 2
episodes_total: 213
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.5187499523162842
        cur_lr: 4.999999873689376e-05
        entropy: 8.357046127319336
        entropy_coeff: 0.0
        kl: 0.010001135990023613
        model: {}
        policy_loss: -0.2833748161792755
        total_loss: -0.0508008636534214
        vf_explained_var: -0.3788657486438751
        vf_loss: 0.21738477051258087
    blue_1:

blue_1DOWN
LOSE
blue_0 True True 61 0.005 0.30500000000000016
blue_1 True True 61 -2.0 -1.6999999999999997
blue_1DOWN
LOSE
blue_0 True True 134 0.005 0.6700000000000005
blue_1 True True 134 -2.0 -1.3349999999999995
blue_0DOWN
LOSE
blue_0 True True 204 -2.0 -0.9849999999999997
blue_1 True True 204 0.004 1.0040000000000007
agent_timesteps_total: 79568
custom_metrics: {}
date: 2021-12-09_03-39-41
done: false
episode_len_mean: 205.37
episode_media: {}
episode_reward_max: 4.303999999999868
episode_reward_mean: 0.07652999999997782
episode_reward_min: -1.665
episodes_this_iter: 3
episodes_total: 219
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.5187499523162842
        cur_lr: 4.999999873689376e-05
        entropy: 8.500276565551758
        entropy_coeff: 0.0
        kl: 0.010232334025204182
        model: {}
        policy_loss: -0.16778361797332764
        total_loss: 0.036

blue_1DOWN
LOSE
blue_0 True True 596 0.005 2.9799999999999587
blue_1 True True 596 -1.998 1.0089999999999584
agent_timesteps_total: 83562
custom_metrics: {}
date: 2021-12-09_03-44-07
done: false
episode_len_mean: 217.85
episode_media: {}
episode_reward_max: 4.303999999999868
episode_reward_mean: 0.20161999999997562
episode_reward_min: -1.6249999999999998
episodes_this_iter: 1
episodes_total: 225
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.5187499523162842
        cur_lr: 4.999999873689376e-05
        entropy: 8.808180809020996
        entropy_coeff: 0.0
        kl: 0.011492257006466389
        model: {}
        policy_loss: -0.3537748157978058
        total_loss: -0.3120836317539215
        vf_explained_var: -1.0
        vf_loss: 0.024237295612692833
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.2814452648162842
        cur_lr: 4.9

LOSE
blue_0 False True 417 -0.995 1.0849999999999778
blue_1 False True 417 -0.976 1.7139999999999924
agent_timesteps_total: 87258
custom_metrics: {}
date: 2021-12-09_03-48-24
done: false
episode_len_mean: 220.5
episode_media: {}
episode_reward_max: 4.303999999999868
episode_reward_mean: 0.23283999999997484
episode_reward_min: -1.6249999999999998
episodes_this_iter: 1
episodes_total: 231
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.5187499523162842
        cur_lr: 4.999999873689376e-05
        entropy: 8.103047370910645
        entropy_coeff: 0.0
        kl: 0.0111727649345994
        model: {}
        policy_loss: -0.2657844126224518
        total_loss: -0.1771230548620224
        vf_explained_var: -0.4140321612358093
        vf_loss: 0.07169270515441895
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.2814452648162842
        cur_lr: 

blue_0DOWN
LOSE
blue_0 True True 47 -2.0 -1.7699999999999998
blue_1 True True 47 0.005 0.23500000000000013
blue_0DOWN
LOSE
blue_0 True True 244 -2.0 -0.7850000000000039
blue_1 True True 244 0.005 1.219999999999996
agent_timesteps_total: 90370
custom_metrics: {}
date: 2021-12-09_03-52-16
done: false
episode_len_mean: 222.7
episode_media: {}
episode_reward_max: 4.303999999999868
episode_reward_mean: 0.2548599999999738
episode_reward_min: -1.6249999999999998
episodes_this_iter: 2
episodes_total: 237
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.5187499523162842
        cur_lr: 4.999999873689376e-05
        entropy: 7.944298267364502
        entropy_coeff: 0.0
        kl: 0.012993530370295048
        model: {}
        policy_loss: -0.2632477283477783
        total_loss: 0.3235565423965454
        vf_explained_var: 0.14146679639816284
        vf_loss: 0.5670703053474426
   

LOSE
blue_0 False False 600 -0.995 1.9999999999999583
blue_1 False False 600 -0.995 2.50099999999996
agent_timesteps_total: 94154
custom_metrics: {}
date: 2021-12-09_03-56-44
done: false
episode_len_mean: 225.46
episode_media: {}
episode_reward_max: 4.500999999999903
episode_reward_mean: 0.2879999999999733
episode_reward_min: -1.6249999999999998
episodes_this_iter: 1
episodes_total: 243
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.5187499523162842
        cur_lr: 4.999999873689376e-05
        entropy: 8.788362503051758
        entropy_coeff: 0.0
        kl: 0.004508101847022772
        model: {}
        policy_loss: -0.3826887011528015
        total_loss: -0.369390070438385
        vf_explained_var: -0.5869614481925964
        vf_loss: 0.006451974622905254
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.2814452648162842
        cur_lr

blue_1DOWN
LOSE
blue_0 True True 98 0.005 0.4900000000000003
blue_1 True True 98 -2.0 -1.5149999999999997
LOSE
blue_0 False False 600 -0.995 1.9999999999999583
blue_1 False False 600 -0.995 2.029999999999958
agent_timesteps_total: 97272
custom_metrics: {}
date: 2021-12-09_04-00-37
done: false
episode_len_mean: 229.64
episode_media: {}
episode_reward_max: 4.500999999999903
episode_reward_mean: 0.3305899999999726
episode_reward_min: -1.6249999999999998
episodes_this_iter: 2
episodes_total: 249
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.139062523841858
        cur_lr: 4.999999873689376e-05
        entropy: 6.983354568481445
        entropy_coeff: 0.0
        kl: 0.012005869299173355
        model: {}
        policy_loss: -0.2831079959869385
        total_loss: -0.2261706292629242
        vf_explained_var: -0.5839776396751404
        vf_loss: 0.04326190426945686
    blu

agent_timesteps_total: 99836
custom_metrics: {}
date: 2021-12-09_04-04-04
done: false
episode_len_mean: 230.94
episode_media: {}
episode_reward_max: 4.500999999999903
episode_reward_mean: 0.3426399999999721
episode_reward_min: -1.6249999999999998
episodes_this_iter: 2
episodes_total: 255
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.139062523841858
        cur_lr: 4.999999873689376e-05
        entropy: 8.088632583618164
        entropy_coeff: 0.0
        kl: 0.00770656717941165
        model: {}
        policy_loss: -0.348425030708313
        total_loss: -0.2440631240606308
        vf_explained_var: 0.39506733417510986
        vf_loss: 0.09558363258838654
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.2814452648162842
        cur_lr: 4.999999873689376e-05
        entropy: 8.73184585571289
        entropy_coeff: 0.0
        kl: 0.01358

blue_1DOWN
LOSE
blue_0 True True 178 0.005 0.8900000000000007
blue_1 True True 178 -2.0 -1.1149999999999993
blue_0DOWN
LOSE
blue_0 True True 282 -2.0 -0.595000000000008
blue_1 True True 282 0.007 1.423999999999992
agent_timesteps_total: 103724
custom_metrics: {}
date: 2021-12-09_04-08-30
done: false
episode_len_mean: 234.31
episode_media: {}
episode_reward_max: 4.500999999999903
episode_reward_mean: 0.3793999999999713
episode_reward_min: -1.6249999999999998
episodes_this_iter: 2
episodes_total: 262
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.139062523841858
        cur_lr: 4.999999873689376e-05
        entropy: 8.52883529663086
        entropy_coeff: 0.0
        kl: 0.008502266369760036
        model: {}
        policy_loss: -0.42224371433258057
        total_loss: -0.3513264060020447
        vf_explained_var: 0.1879502385854721
        vf_loss: 0.06123265251517296
 

blue_0DOWN
LOSE
blue_0 True True 101 -2.0 -1.4999999999999996
blue_1 True True 101 0.005 0.5050000000000003
blue_0DOWN
LOSE
blue_0 True True 100 -2.0 -1.5049999999999997
blue_1 True True 100 0.005 0.5000000000000003
LOSE
blue_0 False False 600 -0.995 1.9999999999999583
blue_1 False False 600 -0.995 2.069999999999958
agent_timesteps_total: 107362
custom_metrics: {}
date: 2021-12-09_04-12-59
done: false
episode_len_mean: 239.16
episode_media: {}
episode_reward_max: 4.500999999999903
episode_reward_mean: 0.43159999999997056
episode_reward_min: -1.6249999999999998
episodes_this_iter: 3
episodes_total: 270
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.708593726158142
        cur_lr: 4.999999873689376e-05
        entropy: 8.097107887268066
        entropy_coeff: 0.0
        kl: 0.016832930967211723
        model: {}
        policy_loss: -0.11218605935573578
        total_los

blue_1DOWN
LOSE
blue_0 True True 566 0.005 2.829999999999962
blue_1 True True 566 -2.0 0.884999999999962
agent_timesteps_total: 110902
custom_metrics: {}
date: 2021-12-09_04-17-07
done: false
episode_len_mean: 242.66
episode_media: {}
episode_reward_max: 4.500999999999903
episode_reward_mean: 0.4691799999999706
episode_reward_min: -1.6249999999999998
episodes_this_iter: 2
episodes_total: 276
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.708593726158142
        cur_lr: 4.999999873689376e-05
        entropy: 8.466178894042969
        entropy_coeff: 0.0
        kl: 0.012111833319067955
        model: {}
        policy_loss: -0.08757425844669342
        total_loss: 0.17406272888183594
        vf_explained_var: -0.39144906401634216
        vf_loss: 0.2409427911043167
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.2814452648162842
        c

blue_1DOWN
LOSE
blue_0 True True 196 0.005 0.9800000000000008
blue_1 True True 196 -2.0 -1.0249999999999992
blue_1DOWN
LOSE
blue_0 True True 56 0.005 0.28000000000000014
blue_1 True True 56 -2.0 -1.7249999999999999
blue_0DOWN
LOSE
blue_0 True True 127 -2.0 -1.3699999999999997
blue_1 True True 127 0.005 0.6350000000000005
agent_timesteps_total: 113886
custom_metrics: {}
date: 2021-12-09_04-21-19
done: false
episode_len_mean: 243.97
episode_media: {}
episode_reward_max: 4.500999999999903
episode_reward_mean: 0.48072999999997035
episode_reward_min: -1.6249999999999998
episodes_this_iter: 3
episodes_total: 285
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.708593726158142
        cur_lr: 4.999999873689376e-05
        entropy: 8.124265670776367
        entropy_coeff: 0.0
        kl: 0.009304244071245193
        model: {}
        policy_loss: -0.671421468257904
        total_

blue_1DOWN
LOSE
blue_0 True True 243 0.005 1.214999999999996
blue_1 True True 243 -2.0 -0.7900000000000038
blue_1DOWN
LOSE
blue_0 True True 321 0.005 1.6049999999999878
blue_1 True True 321 -2.0 -0.4300000000000088
agent_timesteps_total: 116118
custom_metrics: {}
date: 2021-12-09_04-24-00
done: false
episode_len_mean: 247.29
episode_media: {}
episode_reward_max: 4.500999999999903
episode_reward_mean: 0.5136299999999696
episode_reward_min: -1.6249999999999998
episodes_this_iter: 2
episodes_total: 290
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.708593726158142
        cur_lr: 4.999999873689376e-05
        entropy: 8.064812660217285
        entropy_coeff: 0.0
        kl: 0.018664229661226273
        model: {}
        policy_loss: -0.062953881919384
        total_loss: 0.006846689153462648
        vf_explained_var: -0.5202711820602417
        vf_loss: 0.03791099414229393

blue_0DOWN
LOSE
blue_0 True True 293 -2.0 -0.5400000000000091
blue_1 True True 293 0.005 1.4489999999999925
agent_timesteps_total: 118948
custom_metrics: {}
date: 2021-12-09_04-27-29
done: false
episode_len_mean: 252.83
episode_media: {}
episode_reward_max: 4.500999999999903
episode_reward_mean: 0.5673899999999688
episode_reward_min: -1.6249999999999998
episodes_this_iter: 1
episodes_total: 295
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.708593726158142
        cur_lr: 4.999999873689376e-05
        entropy: 8.268702507019043
        entropy_coeff: 0.0
        kl: 0.007259249687194824
        model: {}
        policy_loss: -0.3361699879169464
        total_loss: -0.1546652764081955
        vf_explained_var: -0.5109124779701233
        vf_loss: 0.16910164058208466
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.9221680164337158
       

blue_1DOWN
LOSE
blue_0 True True 158 0.005 0.7900000000000006
blue_1 True True 158 -2.0 -1.2149999999999994
blue_1DOWN
LOSE
blue_0 True True 596 0.005 2.9799999999999587
blue_1 True True 596 -2.0 1.0069999999999588
agent_timesteps_total: 122558
custom_metrics: {}
date: 2021-12-09_04-31-50
done: false
episode_len_mean: 255.05
episode_media: {}
episode_reward_max: 4.500999999999903
episode_reward_mean: 0.5927099999999682
episode_reward_min: -1.6249999999999998
episodes_this_iter: 2
episodes_total: 302
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.708593726158142
        cur_lr: 4.999999873689376e-05
        entropy: 7.400657653808594
        entropy_coeff: 0.0
        kl: 0.019308585673570633
        model: {}
        policy_loss: 0.008249957114458084
        total_loss: 0.08649646490812302
        vf_explained_var: -0.5204455852508545
        vf_loss: 0.0452559813857078

blue_1DOWN
LOSE
blue_0 True True 158 0.005 0.7900000000000006
blue_1 True True 158 -2.0 -1.2149999999999994
blue_1DOWN
LOSE
blue_0 True True 402 0.005 2.0099999999999794
blue_1 True True 402 -1.998 0.07399999999998319
agent_timesteps_total: 125638
custom_metrics: {}
date: 2021-12-09_04-35-25
done: false
episode_len_mean: 259.48
episode_media: {}
episode_reward_max: 4.500999999999903
episode_reward_mean: 0.6379099999999672
episode_reward_min: -1.6249999999999998
episodes_this_iter: 2
episodes_total: 307
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.708593726158142
        cur_lr: 4.999999873689376e-05
        entropy: 8.046490669250488
        entropy_coeff: 0.0
        kl: 0.012560492381453514
        model: {}
        policy_loss: -0.0889715701341629
        total_loss: 0.022559847682714462
        vf_explained_var: -0.28001177310943604
        vf_loss: 0.090070642530

agent_timesteps_total: 127850
custom_metrics: {}
date: 2021-12-09_04-38-22
done: false
episode_len_mean: 259.32
episode_media: {}
episode_reward_max: 4.500999999999903
episode_reward_mean: 0.6366599999999677
episode_reward_min: -1.5349999999999997
episodes_this_iter: 2
episodes_total: 313
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.708593726158142
        cur_lr: 4.999999873689376e-05
        entropy: 8.080831527709961
        entropy_coeff: 0.0
        kl: 0.011028318665921688
        model: {}
        policy_loss: -0.2562921941280365
        total_loss: -0.007282448932528496
        vf_explained_var: -0.337415486574173
        vf_loss: 0.23016683757305145
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.441625952720642
        cur_lr: 4.999999873689376e-05
        entropy: 7.923246383666992
        entropy_coeff: 0.0
        kl: 0.0

blue_0DOWN
LOSE
blue_0 True True 109 -2.0 -1.4599999999999995
blue_1 True True 109 0.005 0.5450000000000004
blue_1DOWN
LOSE
blue_0 True True 460 0.005 2.299999999999973
blue_1 True True 460 -2.0 0.3309999999999724
agent_timesteps_total: 131600
custom_metrics: {}
date: 2021-12-09_04-42-44
done: false
episode_len_mean: 257.8
episode_media: {}
episode_reward_max: 4.500999999999903
episode_reward_mean: 0.6242499999999681
episode_reward_min: -1.5349999999999997
episodes_this_iter: 2
episodes_total: 320
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.708593726158142
        cur_lr: 4.999999873689376e-05
        entropy: 7.658992290496826
        entropy_coeff: 0.0
        kl: 0.01572975516319275
        model: {}
        policy_loss: -0.0968562588095665
        total_loss: 0.1649019718170166
        vf_explained_var: -0.5548828840255737
        vf_loss: 0.23488245904445648
   

blue_0DOWN
LOSE
blue_0 True True 72 -2.0 -1.6449999999999998
blue_1 True True 72 0.005 0.3600000000000002
blue_0DOWN
LOSE
blue_0 True True 159 -2.0 -1.2099999999999995
blue_1 True True 159 0.005 0.7950000000000006
blue_1DOWN
LOSE
blue_0 True True 102 0.005 0.5100000000000003
blue_1 True True 102 -2.0 -1.4949999999999997
agent_timesteps_total: 134076
custom_metrics: {}
date: 2021-12-09_04-46-02
done: false
episode_len_mean: 251.16
episode_media: {}
episode_reward_max: 4.500999999999903
episode_reward_mean: 0.5554299999999696
episode_reward_min: -1.5349999999999997
episodes_this_iter: 3
episodes_total: 326
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.708593726158142
        cur_lr: 4.999999873689376e-05
        entropy: 7.78400993347168
        entropy_coeff: 0.0
        kl: 0.01265870314091444
        model: {}
        policy_loss: 0.3166790008544922
        total_loss

blue_1DOWN
LOSE
blue_0 True True 89 0.005 0.4450000000000003
blue_1 True True 89 -2.0 -1.5599999999999996
LOSE
blue_0 False True 453 -0.995 1.264999999999974
blue_1 False True 453 -0.993 1.5199999999999938
agent_timesteps_total: 137952
custom_metrics: {}
date: 2021-12-09_04-50-37
done: false
episode_len_mean: 247.74
episode_media: {}
episode_reward_max: 4.500999999999903
episode_reward_mean: 0.5170799999999709
episode_reward_min: -1.5349999999999997
episodes_this_iter: 2
episodes_total: 332
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.708593726158142
        cur_lr: 4.999999873689376e-05
        entropy: 7.899351119995117
        entropy_coeff: 0.0
        kl: 0.008404167369008064
        model: {}
        policy_loss: -0.22350244224071503
        total_loss: -0.15626958012580872
        vf_explained_var: -0.3232928514480591
        vf_loss: 0.052873533219099045
    b

blue_0DOWN
LOSE
blue_0 True True 226 -2.0 -0.875000000000002
blue_1 True True 226 0.005 1.1399999999999986
blue_1DOWN
LOSE
blue_0 True True 133 0.005 0.6650000000000005
blue_1 True True 133 -2.0 -1.3399999999999994
agent_timesteps_total: 142022
custom_metrics: {}
date: 2021-12-09_04-55-17
done: false
episode_len_mean: 255.46
episode_media: {}
episode_reward_max: 4.500999999999903
episode_reward_mean: 0.5927499999999689
episode_reward_min: -1.4949999999999997
episodes_this_iter: 2
episodes_total: 339
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.708593726158142
        cur_lr: 4.999999873689376e-05
        entropy: 6.341311931610107
        entropy_coeff: 0.0
        kl: 0.011803094297647476
        model: {}
        policy_loss: 0.28550732135772705
        total_loss: 0.5072978734970093
        vf_explained_var: 0.6677976250648499
        vf_loss: 0.20162388682365417
 

LOSE
blue_0 False True 359 -0.995 0.7949999999999838
blue_1 False True 359 -0.993 1.033999999999987
agent_timesteps_total: 144756
custom_metrics: {}
date: 2021-12-09_04-58-42
done: false
episode_len_mean: 247.47
episode_media: {}
episode_reward_max: 4.069999999999903
episode_reward_mean: 0.5090799999999708
episode_reward_min: -1.4949999999999997
episodes_this_iter: 3
episodes_total: 346
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.708593726158142
        cur_lr: 4.999999873689376e-05
        entropy: 8.14013671875
        entropy_coeff: 0.0
        kl: 0.01760896109044552
        model: {}
        policy_loss: -0.10945994406938553
        total_loss: 0.24921827018260956
        vf_explained_var: -0.21663837134838104
        vf_loss: 0.3285916745662689
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.824557900428772
        cur_lr: 4.99

LOSE
blue_0 False True 310 -0.995 0.549999999999989
blue_1 False True 310 -0.993 0.8159999999999886
agent_timesteps_total: 147596
custom_metrics: {}
date: 2021-12-09_05-02-18
done: false
episode_len_mean: 247.49
episode_media: {}
episode_reward_max: 4.069999999999903
episode_reward_mean: 0.5136999999999707
episode_reward_min: -1.4449999999999996
episodes_this_iter: 1
episodes_total: 352
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.708593726158142
        cur_lr: 4.999999873689376e-05
        entropy: 8.114702224731445
        entropy_coeff: 0.0
        kl: 0.006724438164383173
        model: {}
        policy_loss: -0.4372192323207855
        total_loss: -0.3885626792907715
        vf_explained_var: -0.17868542671203613
        vf_loss: 0.037167228758335114
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 0.912278950214386
        cur_lr

blue_1DOWN
LOSE
blue_0 True True 80 0.005 0.40000000000000024
blue_1 True True 80 -2.0 -1.6049999999999998
blue_0DOWN
LOSE
blue_0 True True 375 -2.0 -0.13000000000001788
blue_1 True True 375 0.004 1.8469999999999887
agent_timesteps_total: 151048
custom_metrics: {}
date: 2021-12-09_05-06-19
done: false
episode_len_mean: 246.66
episode_media: {}
episode_reward_max: 4.069999999999903
episode_reward_mean: 0.5046699999999709
episode_reward_min: -1.4449999999999996
episodes_this_iter: 2
episodes_total: 358
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.708593726158142
        cur_lr: 4.999999873689376e-05
        entropy: 8.124970436096191
        entropy_coeff: 0.0
        kl: 0.006236369721591473
        model: {}
        policy_loss: -0.3868063688278198
        total_loss: -0.3294978141784668
        vf_explained_var: 0.04249412566423416
        vf_loss: 0.0466531440615654

blue_1DOWN
LOSE
blue_0 True True 82 0.005 0.41000000000000025
blue_1 True True 82 -2.0 -1.5949999999999998
blue_1DOWN
LOSE
blue_0 True True 101 0.005 0.5050000000000003
blue_1 True True 101 -2.0 -1.4999999999999996
blue_1DOWN
LOSE
blue_0 True True 116 0.005 0.5800000000000004
blue_1 True True 116 -2.0 -1.4249999999999996
agent_timesteps_total: 154614
custom_metrics: {}
date: 2021-12-09_05-10-37
done: false
episode_len_mean: 250.15
episode_media: {}
episode_reward_max: 4.069999999999903
episode_reward_mean: 0.5422399999999703
episode_reward_min: -1.4449999999999996
episodes_this_iter: 3
episodes_total: 365
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.708593726158142
        cur_lr: 4.999999873689376e-05
        entropy: 7.134803295135498
        entropy_coeff: 0.0
        kl: 0.01242332998663187
        model: {}
        policy_loss: -0.2591117322444916
        total_l

blue_0DOWN
LOSE
blue_0 True True 113 -2.0 -1.4399999999999995
blue_1 True True 113 0.005 0.5650000000000004
blue_0DOWN
LOSE
blue_0 True True 549 -2.0 0.7399999999999638
blue_1 True True 549 0.005 2.8039999999999643
agent_timesteps_total: 158806
custom_metrics: {}
date: 2021-12-09_05-15-10
done: false
episode_len_mean: 255.51
episode_media: {}
episode_reward_max: 4.041999999999899
episode_reward_mean: 0.5925199999999688
episode_reward_min: -1.4449999999999996
episodes_this_iter: 2
episodes_total: 371
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.708593726158142
        cur_lr: 4.999999873689376e-05
        entropy: 7.222594261169434
        entropy_coeff: 0.0
        kl: 0.012309655547142029
        model: {}
        policy_loss: -0.15992164611816406
        total_loss: 0.177594393491745
        vf_explained_var: -0.3489828407764435
        vf_loss: 0.31648385524749756


blue_1DOWN
LOSE
blue_0 True True 64 0.005 0.3200000000000002
blue_1 True True 64 -2.0 -1.6849999999999998
blue_1DOWN
LOSE
blue_0 True True 231 0.005 1.1549999999999974
blue_1 True True 231 -2.001 -0.8509999999999991
agent_timesteps_total: 161000
custom_metrics: {}
date: 2021-12-09_05-17-58
done: false
episode_len_mean: 250.49
episode_media: {}
episode_reward_max: 4.041999999999899
episode_reward_mean: 0.5439399999999694
episode_reward_min: -1.4449999999999996
episodes_this_iter: 2
episodes_total: 376
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.708593726158142
        cur_lr: 4.999999873689376e-05
        entropy: 8.00387954711914
        entropy_coeff: 0.0
        kl: 0.018078971654176712
        model: {}
        policy_loss: -0.06730633229017258
        total_loss: 0.0010339011205360293
        vf_explained_var: -0.2317465841770172
        vf_loss: 0.03745062276721

agent_timesteps_total: 162864
custom_metrics: {}
date: 2021-12-09_05-20-42
done: false
episode_len_mean: 248.68
episode_media: {}
episode_reward_max: 4.041999999999899
episode_reward_mean: 0.5245999999999694
episode_reward_min: -1.5949999999999998
episodes_this_iter: 1
episodes_total: 382
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.708593726158142
        cur_lr: 4.999999873689376e-05
        entropy: 8.432516098022461
        entropy_coeff: 0.0
        kl: 0.017274873331189156
        model: {}
        policy_loss: -0.08451561629772186
        total_loss: 0.12663300335407257
        vf_explained_var: -0.651197612285614
        vf_loss: 0.1816328912973404
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.3684184551239014
        cur_lr: 4.999999873689376e-05
        entropy: 7.679295063018799
        entropy_coeff: 0.0
        kl: 0.01

blue_0DOWN
LOSE
blue_0 True True 220 -2.0 -0.9050000000000014
blue_1 True True 220 0.005 1.133999999999998
blue_1DOWN
LOSE
blue_0 True True 81 0.005 0.40500000000000025
blue_1 True True 81 -2.0 -1.5999999999999996
agent_timesteps_total: 165786
custom_metrics: {}
date: 2021-12-09_05-24-26
done: false
episode_len_mean: 251.55
episode_media: {}
episode_reward_max: 4.041999999999899
episode_reward_mean: 0.5565399999999694
episode_reward_min: -1.5949999999999998
episodes_this_iter: 2
episodes_total: 389
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.708593726158142
        cur_lr: 4.999999873689376e-05
        entropy: 8.116236686706543
        entropy_coeff: 0.0
        kl: 0.016636788845062256
        model: {}
        policy_loss: 0.03921627998352051
        total_loss: 0.2834567129611969
        vf_explained_var: 0.32302871346473694
        vf_loss: 0.21581488847732544
 

blue_1DOWN
LOSE
blue_0 True True 332 0.005 1.6599999999999866
blue_1 True True 332 -1.999 -0.34600000000000875
agent_timesteps_total: 167728
custom_metrics: {}
date: 2021-12-09_05-26-49
done: false
episode_len_mean: 251.05
episode_media: {}
episode_reward_max: 4.041999999999899
episode_reward_mean: 0.5488199999999693
episode_reward_min: -1.5949999999999998
episodes_this_iter: 1
episodes_total: 393
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.708593726158142
        cur_lr: 4.999999873689376e-05
        entropy: 7.8133463859558105
        entropy_coeff: 0.0
        kl: 0.016735954210162163
        model: {}
        policy_loss: -0.175999253988266
        total_loss: -0.0710650160908699
        vf_explained_var: -1.0
        vf_loss: 0.07633931934833527
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 2.0526275634765625
        cur_lr: 4.9

blue_0DOWN
LOSE
blue_0 True True 133 -2.0 -1.3399999999999994
blue_1 True True 133 0.005 0.6650000000000005
blue_0DOWN
LOSE
blue_0 True True 71 -2.0 -1.65
blue_1 True True 71 0.005 0.3550000000000002
blue_0DOWN
LOSE
blue_0 True True 52 -2.0 -1.7449999999999999
blue_1 True True 52 0.005 0.2600000000000001
agent_timesteps_total: 170984
custom_metrics: {}
date: 2021-12-09_05-31-00
done: false
episode_len_mean: 242.13
episode_media: {}
episode_reward_max: 4.041999999999899
episode_reward_mean: 0.4520499999999709
episode_reward_min: -1.5949999999999998
episodes_this_iter: 3
episodes_total: 402
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.708593726158142
        cur_lr: 4.999999873689376e-05
        entropy: 6.30948543548584
        entropy_coeff: 0.0
        kl: 0.017203493043780327
        model: {}
        policy_loss: -0.10310036689043045
        total_loss: 0.767826914

blue_1DOWN
LOSE
blue_0 True True 593 0.005 2.964999999999959
blue_1 True True 593 -1.999 1.001999999999958
agent_timesteps_total: 175032
custom_metrics: {}
date: 2021-12-09_05-35-12
done: false
episode_len_mean: 250.99
episode_media: {}
episode_reward_max: 4.027999999999878
episode_reward_mean: 0.5410099999999691
episode_reward_min: -1.5949999999999998
episodes_this_iter: 1
episodes_total: 406
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 0.854296863079071
        cur_lr: 4.999999873689376e-05
        entropy: 8.970685005187988
        entropy_coeff: 0.0
        kl: 0.013545670546591282
        model: {}
        policy_loss: -0.26760751008987427
        total_loss: -0.25166305899620056
        vf_explained_var: -0.870343029499054
        vf_loss: 0.004372436087578535
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.1546030044555664
      

blue_0DOWN
LOSE
blue_0 True True 73 -2.0 -1.6399999999999997
blue_1 True True 73 0.005 0.3650000000000002
blue_0DOWN
LOSE
blue_0 True True 151 -2.0 -1.2499999999999996
blue_1 True True 151 0.005 0.7550000000000006
blue_0DOWN
LOSE
blue_0 True True 57 -2.0 -1.7199999999999998
blue_1 True True 57 0.005 0.28500000000000014
agent_timesteps_total: 177244
custom_metrics: {}
date: 2021-12-09_05-38-07
done: false
episode_len_mean: 248.88
episode_media: {}
episode_reward_max: 5.584999999999989
episode_reward_mean: 0.5527299999999697
episode_reward_min: -1.5949999999999998
episodes_this_iter: 3
episodes_total: 412
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.2814452648162842
        cur_lr: 4.999999873689376e-05
        entropy: 6.261061668395996
        entropy_coeff: 0.0
        kl: 0.019026458263397217
        model: {}
        policy_loss: -0.19157372415065765
        total_

LOSE
blue_0 False True 420 -0.995 1.0999999999999774
blue_1 False True 420 -0.993 1.4199999999999906
agent_timesteps_total: 180528
custom_metrics: {}
date: 2021-12-09_05-41-54
done: false
episode_len_mean: 250.33
episode_media: {}
episode_reward_max: 5.584999999999989
episode_reward_mean: 0.5639399999999695
episode_reward_min: -1.5949999999999998
episodes_this_iter: 2
episodes_total: 418
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.2814452648162842
        cur_lr: 4.999999873689376e-05
        entropy: 7.140296459197998
        entropy_coeff: 0.0
        kl: 0.012738869525492191
        model: {}
        policy_loss: -0.3576769530773163
        total_loss: -0.31736478209495544
        vf_explained_var: -0.4006914794445038
        vf_loss: 0.023987986147403717
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.7319045066833496
        cur

blue_0DOWN
LOSE
blue_0 True True 363 -2.0 -0.1900000000000166
blue_1 True True 363 0.005 1.8319999999999848
agent_timesteps_total: 183854
custom_metrics: {}
date: 2021-12-09_05-45-45
done: false
episode_len_mean: 252.22
episode_media: {}
episode_reward_max: 5.584999999999989
episode_reward_mean: 0.5857299999999692
episode_reward_min: -1.5949999999999998
episodes_this_iter: 1
episodes_total: 423
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.2814452648162842
        cur_lr: 4.999999873689376e-05
        entropy: 7.466643810272217
        entropy_coeff: 0.0
        kl: 0.01202976331114769
        model: {}
        policy_loss: -0.5705822110176086
        total_loss: -0.5036413073539734
        vf_explained_var: 0.298409640789032
        vf_loss: 0.051525361835956573
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.7319045066833496
        

blue_0DOWN
LOSE
blue_0 True True 286 -2.0 -0.5750000000000084
blue_1 True True 286 0.004 1.4009999999999978
agent_timesteps_total: 186078
custom_metrics: {}
date: 2021-12-09_05-48-28
done: false
episode_len_mean: 256.77
episode_media: {}
episode_reward_max: 5.584999999999989
episode_reward_mean: 0.6322099999999686
episode_reward_min: -1.5949999999999998
episodes_this_iter: 1
episodes_total: 427
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.9221680164337158
        cur_lr: 4.999999873689376e-05
        entropy: 7.3959479331970215
        entropy_coeff: 0.0
        kl: 0.010215270332992077
        model: {}
        policy_loss: -0.29811903834342957
        total_loss: -0.0637461468577385
        vf_explained_var: 0.25666946172714233
        vf_loss: 0.21473738551139832
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.7319045066833496
    

blue_0DOWN
LOSE
blue_0 True True 403 -2.0 0.009999999999979359
blue_1 True True 403 0.007 2.0409999999999817
agent_timesteps_total: 189728
custom_metrics: {}
date: 2021-12-09_05-52-37
done: false
episode_len_mean: 258.88
episode_media: {}
episode_reward_max: 5.584999999999989
episode_reward_mean: 0.6512599999999682
episode_reward_min: -1.5949999999999998
episodes_this_iter: 1
episodes_total: 432
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.9221680164337158
        cur_lr: 4.999999873689376e-05
        entropy: 7.19296407699585
        entropy_coeff: 0.0
        kl: 0.014063163660466671
        model: {}
        policy_loss: -0.1801186203956604
        total_loss: 0.03621001914143562
        vf_explained_var: 0.4773595631122589
        vf_loss: 0.1892968863248825
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.7319045066833496
        

blue_0DOWN
LOSE
blue_0 True True 209 -2.0 -0.9600000000000002
blue_1 True True 209 0.005 1.0449999999999997
LOSE
blue_0 False True 406 -0.995 1.029999999999979
blue_1 False True 406 -0.993 1.3149999999999902
agent_timesteps_total: 194202
custom_metrics: {}
date: 2021-12-09_05-57-37
done: false
episode_len_mean: 260.9
episode_media: {}
episode_reward_max: 5.584999999999989
episode_reward_mean: 0.6783199999999688
episode_reward_min: -1.5949999999999998
episodes_this_iter: 2
episodes_total: 439
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.9221680164337158
        cur_lr: 4.999999873689376e-05
        entropy: 7.806046485900879
        entropy_coeff: 0.0
        kl: 0.013394455425441265
        model: {}
        policy_loss: -0.20756909251213074
        total_loss: 0.10951853543519974
        vf_explained_var: -0.07726705819368362
        vf_loss: 0.29134121537208557
    

blue_1DOWN
LOSE
blue_0 True True 89 0.005 0.4450000000000003
blue_1 True True 89 -2.0 -1.5599999999999996
blue_1DOWN
LOSE
blue_0 True True 172 0.005 0.8600000000000007
blue_1 True True 172 -2.0 -1.1449999999999994
agent_timesteps_total: 197710
custom_metrics: {}
date: 2021-12-09_06-01-26
done: false
episode_len_mean: 269.75
episode_media: {}
episode_reward_max: 5.584999999999989
episode_reward_mean: 0.7696799999999672
episode_reward_min: -1.5949999999999998
episodes_this_iter: 2
episodes_total: 444
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 0.9610840082168579
        cur_lr: 4.999999873689376e-05
        entropy: 7.892664432525635
        entropy_coeff: 0.0
        kl: 0.017032625153660774
        model: {}
        policy_loss: -0.10833708941936493
        total_loss: -0.05411849915981293
        vf_explained_var: -0.4824533760547638
        vf_loss: 0.037848822772502

blue_0DOWN
LOSE
blue_0 True True 73 -2.0 -1.6399999999999997
blue_1 True True 73 0.005 0.3650000000000002
blue_0DOWN
LOSE
blue_0 True True 385 -2.0 -0.08000000000001894
blue_1 True True 385 0.005 1.9099999999999826
agent_timesteps_total: 201390
custom_metrics: {}
date: 2021-12-09_06-05-38
done: false
episode_len_mean: 277.02
episode_media: {}
episode_reward_max: 5.584999999999989
episode_reward_mean: 0.8395899999999653
episode_reward_min: -1.5949999999999998
episodes_this_iter: 2
episodes_total: 450
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 0.9610840082168579
        cur_lr: 4.999999873689376e-05
        entropy: 6.649712562561035
        entropy_coeff: 0.0
        kl: 0.02497069537639618
        model: {}
        policy_loss: -0.3229932487010956
        total_loss: 0.15994703769683838
        vf_explained_var: 0.23836757242679596
        vf_loss: 0.4589413106441498


blue_1DOWN
LOSE
blue_0 True True 97 0.005 0.4850000000000003
blue_1 True True 97 -2.0 -1.5199999999999996
blue_1DOWN
LOSE
blue_0 True True 99 0.005 0.49500000000000033
blue_1 True True 99 -2.0 -1.5099999999999998
blue_0DOWN
LOSE
blue_0 True True 165 -2.0 -1.1799999999999993
blue_1 True True 165 0.005 0.8250000000000006
agent_timesteps_total: 204546
custom_metrics: {}
date: 2021-12-09_06-09-36
done: false
episode_len_mean: 267.49
episode_media: {}
episode_reward_max: 5.584999999999989
episode_reward_mean: 0.7359799999999674
episode_reward_min: -1.5949999999999998
episodes_this_iter: 3
episodes_total: 458
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 2.1624388694763184
        cur_lr: 4.999999873689376e-05
        entropy: 5.826557636260986
        entropy_coeff: 0.0
        kl: 0.008989772759377956
        model: {}
        policy_loss: -0.6166762113571167
        total_l

blue_0DOWN
LOSE
blue_0 True True 180 -2.0 -1.1049999999999993
blue_1 True True 180 0.006 0.8970000000000007
LOSE
blue_0 False True 345 -0.995 0.7249999999999853
blue_1 False True 345 -0.993 0.9629999999999871
agent_timesteps_total: 208670
custom_metrics: {}
date: 2021-12-09_06-14-05
done: false
episode_len_mean: 271.44
episode_media: {}
episode_reward_max: 5.584999999999989
episode_reward_mean: 0.778479999999967
episode_reward_min: -1.5949999999999998
episodes_this_iter: 2
episodes_total: 464
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 3.2436585426330566
        cur_lr: 4.999999873689376e-05
        entropy: 8.637691497802734
        entropy_coeff: 0.0
        kl: 0.007940794341266155
        model: {}
        policy_loss: -0.0763024240732193
        total_loss: 0.2535322904586792
        vf_explained_var: 0.22557395696640015
        vf_loss: 0.30407750606536865
    bl

blue_0DOWN
LOSE
blue_0 True True 77 -2.0 -1.6199999999999997
blue_1 True True 77 0.005 0.38500000000000023
blue_0DOWN
LOSE
blue_0 True True 469 -2.0 0.3399999999999723
blue_1 True True 469 0.005 2.4829999999999743
agent_timesteps_total: 212440
custom_metrics: {}
date: 2021-12-09_06-18-04
done: false
episode_len_mean: 274.79
episode_media: {}
episode_reward_max: 5.584999999999989
episode_reward_mean: 0.8169999999999668
episode_reward_min: -1.5949999999999998
episodes_this_iter: 2
episodes_total: 469
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 0.8109146356582642
        cur_lr: 4.999999873689376e-05
        entropy: 6.348794460296631
        entropy_coeff: 0.0
        kl: 0.027079539373517036
        model: {}
        policy_loss: -0.19423630833625793
        total_loss: 0.22299952805042267
        vf_explained_var: 0.11767376959323883
        vf_loss: 0.3952766358852386

blue_1DOWN
LOSE
blue_0 True True 404 0.005 2.019999999999979
blue_1 True True 404 -2.0 0.03199999999998049
agent_timesteps_total: 214728
custom_metrics: {}
date: 2021-12-09_06-20-36
done: false
episode_len_mean: 277.53
episode_media: {}
episode_reward_max: 5.584999999999989
episode_reward_mean: 0.8469199999999664
episode_reward_min: -1.5949999999999998
episodes_this_iter: 1
episodes_total: 472
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.2163718938827515
        cur_lr: 4.999999873689376e-05
        entropy: 7.581292629241943
        entropy_coeff: 0.0
        kl: 0.013656528666615486
        model: {}
        policy_loss: -0.20826175808906555
        total_loss: -0.18901990354061127
        vf_explained_var: -0.37273842096328735
        vf_loss: 0.002630434464663267
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.461294412612915
    

blue_1DOWN
LOSE
blue_0 True True 210 0.005 1.0499999999999996
blue_1 True True 210 -2.0 -0.9550000000000003
blue_1DOWN
LOSE
blue_0 True True 555 0.005 2.774999999999963
blue_1 True True 555 -2.0 0.795999999999963
agent_timesteps_total: 218584
custom_metrics: {}
date: 2021-12-09_06-24-51
done: false
episode_len_mean: 285.27
episode_media: {}
episode_reward_max: 5.584999999999989
episode_reward_mean: 0.9189799999999653
episode_reward_min: -1.5949999999999998
episodes_this_iter: 2
episodes_total: 478
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.2163718938827515
        cur_lr: 4.999999873689376e-05
        entropy: 8.116233825683594
        entropy_coeff: 0.0
        kl: 0.02547386847436428
        model: {}
        policy_loss: -0.08147978037595749
        total_loss: -0.024697907269001007
        vf_explained_var: -0.4282563328742981
        vf_loss: 0.0257961694151163

blue_1DOWN
LOSE
blue_0 True True 263 0.005 1.314999999999994
blue_1 True True 263 -2.001 -0.6740000000000042
agent_timesteps_total: 221990
custom_metrics: {}
date: 2021-12-09_06-29-02
done: false
episode_len_mean: 288.82
episode_media: {}
episode_reward_max: 5.584999999999989
episode_reward_mean: 0.9600199999999649
episode_reward_min: -1.645
episodes_this_iter: 1
episodes_total: 485
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.824557900428772
        cur_lr: 4.999999873689376e-05
        entropy: 8.090588569641113
        entropy_coeff: 0.0
        kl: 0.010203276760876179
        model: {}
        policy_loss: -0.06135612726211548
        total_loss: -0.034979064017534256
        vf_explained_var: -0.562175452709198
        vf_loss: 0.007760592736303806
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 2.191941738128662
        cur_lr: 4

LOSE
blue_0 False True 517 -0.995 1.584999999999967
blue_1 False True 517 -0.996 1.419999999999988
agent_timesteps_total: 226412
custom_metrics: {}
date: 2021-12-09_06-33-52
done: false
episode_len_mean: 301.24
episode_media: {}
episode_reward_max: 5.584999999999989
episode_reward_mean: 1.0766699999999618
episode_reward_min: -1.645
episodes_this_iter: 1
episodes_total: 490
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.824557900428772
        cur_lr: 4.999999873689376e-05
        entropy: 9.209187507629395
        entropy_coeff: 0.0
        kl: 0.012577062472701073
        model: {}
        policy_loss: -0.08159233629703522
        total_loss: -0.01580698974430561
        vf_explained_var: -0.5566800236701965
        vf_loss: 0.042837779968976974
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 2.191941738128662
        cur_lr: 4.999999873

blue_0DOWN
LOSE
blue_0 True True 437 -2.0 0.17999999999997573
blue_1 True True 437 0.005 2.1849999999999756
agent_timesteps_total: 230976
custom_metrics: {}
date: 2021-12-09_06-38-40
done: false
episode_len_mean: 309.7
episode_media: {}
episode_reward_max: 5.584999999999989
episode_reward_mean: 1.16165999999996
episode_reward_min: -1.645
episodes_this_iter: 1
episodes_total: 496
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.824557900428772
        cur_lr: 4.999999873689376e-05
        entropy: 5.717077732086182
        entropy_coeff: 0.0
        kl: 0.01403795462101698
        model: {}
        policy_loss: -0.33661508560180664
        total_loss: -0.1499425321817398
        vf_explained_var: 0.2517496645450592
        vf_loss: 0.16105954349040985
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.095970869064331
        cur_lr: 4.9999998

blue_1DOWN
LOSE
blue_0 True True 185 0.005 0.9250000000000007
blue_1 True True 185 -2.0 -1.0959999999999992
LOSE
blue_0 False True 384 -0.995 0.9199999999999812
blue_1 False True 384 -0.993 1.2319999999999887
agent_timesteps_total: 233782
custom_metrics: {}
date: 2021-12-09_06-41-47
done: false
episode_len_mean: 315.22
episode_media: {}
episode_reward_max: 5.584999999999989
episode_reward_mean: 1.2206799999999591
episode_reward_min: -1.645
episodes_this_iter: 2
episodes_total: 500
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.824557900428772
        cur_lr: 4.999999873689376e-05
        entropy: 8.385233879089355
        entropy_coeff: 0.0
        kl: 0.006313846912235022
        model: {}
        policy_loss: -0.3213863670825958
        total_loss: -0.286522775888443
        vf_explained_var: 0.31990617513656616
        vf_loss: 0.023343626409769058
    blue_1:
      

blue_0DOWN
LOSE
blue_0 True True 217 -2.0 -0.920000000000001
blue_1 True True 217 0.005 1.0849999999999989
blue_0DOWN
LOSE
blue_0 True True 275 -2.0 -0.6300000000000072
blue_1 True True 275 0.004 1.3679999999999934
agent_timesteps_total: 237626
custom_metrics: {}
date: 2021-12-09_06-46-05
done: false
episode_len_mean: 311.76
episode_media: {}
episode_reward_max: 5.584999999999989
episode_reward_mean: 1.1856599999999604
episode_reward_min: -1.645
episodes_this_iter: 2
episodes_total: 507
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.824557900428772
        cur_lr: 4.999999873689376e-05
        entropy: 5.921632289886475
        entropy_coeff: 0.0
        kl: 0.012864689342677593
        model: {}
        policy_loss: -0.32412585616111755
        total_loss: -0.008444759994745255
        vf_explained_var: 0.4558350741863251
        vf_loss: 0.2922087013721466
    blue_1:

blue_0DOWN
LOSE
blue_0 True True 69 -2.0 -1.6599999999999997
blue_1 True True 69 0.005 0.3450000000000002
blue_0DOWN
LOSE
blue_0 True True 135 -2.0 -1.3299999999999996
blue_1 True True 135 0.005 0.6750000000000005
LOSE
blue_0 False True 375 -0.995 0.8749999999999821
blue_1 False True 375 -0.993 1.0939999999999865
agent_timesteps_total: 241492
custom_metrics: {}
date: 2021-12-09_06-50-50
done: false
episode_len_mean: 311.87
episode_media: {}
episode_reward_max: 4.146999999999943
episode_reward_mean: 1.1595699999999605
episode_reward_min: -1.645
episodes_this_iter: 3
episodes_total: 515
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.824557900428772
        cur_lr: 4.999999873689376e-05
        entropy: 7.9461283683776855
        entropy_coeff: 0.0
        kl: 0.01846078224480152
        model: {}
        policy_loss: -0.12430548667907715
        total_loss: 0.422805964946

blue_1DOWN
LOSE
blue_0 True True 155 0.005 0.7750000000000006
blue_1 True True 155 -2.0 -1.2299999999999995
blue_1DOWN
LOSE
blue_0 True True 175 0.005 0.8750000000000007
blue_1 True True 175 -2.0 -1.1299999999999994
agent_timesteps_total: 244070
custom_metrics: {}
date: 2021-12-09_06-53-29
done: false
episode_len_mean: 313.51
episode_media: {}
episode_reward_max: 4.146999999999943
episode_reward_mean: 1.17236999999996
episode_reward_min: -1.645
episodes_this_iter: 2
episodes_total: 519
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.824557900428772
        cur_lr: 4.999999873689376e-05
        entropy: 4.734434604644775
        entropy_coeff: 0.0
        kl: 0.008168928325176239
        model: {}
        policy_loss: 0.31565505266189575
        total_loss: 0.3314588963985443
        vf_explained_var: 0.9290408492088318
        vf_loss: 0.0008992197108455002
    blue_1:
 

blue_1DOWN
LOSE
blue_0 True True 159 0.005 0.7950000000000006
blue_1 True True 159 -2.0 -1.2099999999999995
LOSE
blue_0 False True 345 -0.995 0.7249999999999853
blue_1 False True 345 -0.993 0.9769999999999869
agent_timesteps_total: 248914
custom_metrics: {}
date: 2021-12-09_06-58-33
done: false
episode_len_mean: 317.04
episode_media: {}
episode_reward_max: 4.175999999999897
episode_reward_mean: 1.2150199999999591
episode_reward_min: -1.645
episodes_this_iter: 2
episodes_total: 526
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.824557900428772
        cur_lr: 4.999999873689376e-05
        entropy: 7.305724620819092
        entropy_coeff: 0.0
        kl: 0.010680785402655602
        model: {}
        policy_loss: -0.4170686602592468
        total_loss: -0.37242719531059265
        vf_explained_var: -0.09346547722816467
        vf_loss: 0.025153813883662224
    blue_1:
   

blue_1DOWN
LOSE
blue_0 True True 98 0.005 0.4900000000000003
blue_1 True True 98 -2.0 -1.5149999999999997
blue_0DOWN
LOSE
blue_0 True True 234 -2.0 -0.8350000000000029
blue_1 True True 234 0.005 1.169999999999997
agent_timesteps_total: 251756
custom_metrics: {}
date: 2021-12-09_07-01-42
done: false
episode_len_mean: 314.17
episode_media: {}
episode_reward_max: 4.175999999999897
episode_reward_mean: 1.183949999999959
episode_reward_min: -1.645
episodes_this_iter: 2
episodes_total: 531
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 2.7368369102478027
        cur_lr: 4.999999873689376e-05
        entropy: 6.0000715255737305
        entropy_coeff: 0.0
        kl: 0.009500646032392979
        model: {}
        policy_loss: -0.49622586369514465
        total_loss: -0.38682839274406433
        vf_explained_var: 0.020460238680243492
        vf_loss: 0.08339575678110123
    blue_1

blue_0DOWN
LOSE
blue_0 True True 158 -2.0 -1.2149999999999994
blue_1 True True 158 0.005 0.7900000000000006
blue_0DOWN
LOSE
blue_0 True True 203 -2.0 -0.9899999999999995
blue_1 True True 203 0.005 1.0150000000000003
agent_timesteps_total: 254448
custom_metrics: {}
date: 2021-12-09_07-04-48
done: false
episode_len_mean: 313.13
episode_media: {}
episode_reward_max: 4.175999999999897
episode_reward_mean: 1.170409999999959
episode_reward_min: -1.645
episodes_this_iter: 2
episodes_total: 536
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 2.7368369102478027
        cur_lr: 4.999999873689376e-05
        entropy: 4.609232425689697
        entropy_coeff: 0.0
        kl: 0.012094969861209393
        model: {}
        policy_loss: -0.33883148431777954
        total_loss: 0.14000444114208221
        vf_explained_var: 0.3173360526561737
        vf_loss: 0.4457339942455292
    blue_1:


LOSE
blue_0 False False 600 -0.995 1.9999999999999583
blue_1 False False 600 -0.995 2.0349999999999615
agent_timesteps_total: 258214
custom_metrics: {}
date: 2021-12-09_07-08-49
done: false
episode_len_mean: 311.13
episode_media: {}
episode_reward_max: 4.175999999999897
episode_reward_mean: 1.1453099999999592
episode_reward_min: -1.645
episodes_this_iter: 1
episodes_total: 541
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 2.7368369102478027
        cur_lr: 4.999999873689376e-05
        entropy: 8.7867431640625
        entropy_coeff: 0.0
        kl: 0.0029009999707341194
        model: {}
        policy_loss: -0.38154640793800354
        total_loss: -0.3630065619945526
        vf_explained_var: -0.408381849527359
        vf_loss: 0.010600285604596138
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.8494508266448975
        cur_lr: 4.999999

blue_0DOWN
LOSE
blue_0 True True 182 -2.0 -1.0949999999999993
blue_1 True True 182 0.005 0.9100000000000007
LOSE
blue_0 False True 389 -0.995 0.9449999999999806
blue_1 False True 389 -0.993 1.1929999999999872
agent_timesteps_total: 260938
custom_metrics: {}
date: 2021-12-09_07-11-41
done: false
episode_len_mean: 313.86
episode_media: {}
episode_reward_max: 4.175999999999897
episode_reward_mean: 1.1757899999999588
episode_reward_min: -1.645
episodes_this_iter: 2
episodes_total: 545
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.3684184551239014
        cur_lr: 4.999999873689376e-05
        entropy: 7.207497596740723
        entropy_coeff: 0.0
        kl: 0.01744518056511879
        model: {}
        policy_loss: -0.12725695967674255
        total_loss: 0.2606959939002991
        vf_explained_var: -0.12363968044519424
        vf_loss: 0.36408063769340515
    blue_1:
     

LOSE
blue_0 False True 475 -0.995 1.3749999999999716
blue_1 False True 475 -0.996 1.6979999999999955
agent_timesteps_total: 264444
custom_metrics: {}
date: 2021-12-09_07-15-29
done: false
episode_len_mean: 313.69
episode_media: {}
episode_reward_max: 4.175999999999897
episode_reward_mean: 1.1781599999999586
episode_reward_min: -1.645
episodes_this_iter: 1
episodes_total: 551
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 3.0789413452148438
        cur_lr: 4.999999873689376e-05
        entropy: 7.688404560089111
        entropy_coeff: 0.0
        kl: 0.003574493806809187
        model: {}
        policy_loss: -0.44694870710372925
        total_loss: -0.4114289879798889
        vf_explained_var: -0.8193509578704834
        vf_loss: 0.024514008313417435
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.8494508266448975
        cur_lr: 4.999999

blue_0DOWN
LOSE
blue_0 True True 508 -2.0 0.5349999999999682
blue_1 True True 508 0.005 2.5269999999999695
agent_timesteps_total: 268732
custom_metrics: {}
date: 2021-12-09_07-19-51
done: false
episode_len_mean: 323.57
episode_media: {}
episode_reward_max: 4.175999999999897
episode_reward_mean: 1.2781799999999566
episode_reward_min: -1.645
episodes_this_iter: 1
episodes_total: 556
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.5394706726074219
        cur_lr: 4.999999873689376e-05
        entropy: 4.99015474319458
        entropy_coeff: 0.0
        kl: 0.008565478026866913
        model: {}
        policy_loss: -0.4983275532722473
        total_loss: -0.46636873483657837
        vf_explained_var: 0.26920872926712036
        vf_loss: 0.018772434443235397
    blue_1:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 1.8494508266448975
        cur_lr: 4.9

blue_1DOWN
LOSE
blue_0 True True 123 0.005 0.6150000000000004
blue_1 True True 123 -2.0 -1.3899999999999997
LOSE
blue_0 False True 419 -0.995 1.0949999999999775
blue_1 False True 419 -0.993 1.4449999999999932
agent_timesteps_total: 271574
custom_metrics: {}
date: 2021-12-09_07-22-37
done: false
episode_len_mean: 326.64
episode_media: {}
episode_reward_max: 4.175999999999897
episode_reward_mean: 1.3113999999999564
episode_reward_min: -1.645
episodes_this_iter: 2
episodes_total: 560
experiment_id: 3258f0277f3a4fa98a0ca2d0d9f5979d
hostname: DESKTOP
info:
  learner:
    blue_0:
      custom_metrics: {}
      learner_stats:
        cur_kl_coeff: 2.309206008911133
        cur_lr: 4.999999873689376e-05
        entropy: 7.618456840515137
        entropy_coeff: 0.0
        kl: 0.009162690490484238
        model: {}
        policy_loss: -0.17531463503837585
        total_loss: -0.06399530917406082
        vf_explained_var: -0.35879993438720703
        vf_loss: 0.0901607945561409
    blue_1:
    