In [1]:
%matplotlib tk
import argparse
import gym
import datetime
import os
import random
import tempfile
import numpy as np
import pickle

import ray
from ray import tune
from ray.tune.logger import Logger, UnifiedLogger, pretty_print
from ray.rllib.env.multi_agent_env import make_multi_agent
from ray.rllib.examples.models.shared_weights_model import TF2SharedWeightsModel
from ray.rllib.models import ModelCatalog
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.test_utils import check_learning_achieved
from ray.rllib.agents.ppo import ppo, PPOTrainer, PPOTFPolicy
from ray.rllib.agents.a3c.a3c_tf_policy import A3CTFPolicy
from ray.rllib.agents.a3c import a3c
from ray.rllib.models import ModelCatalog
from ray.rllib.policy.policy import PolicySpec
from environment_rllib_3d1 import MyEnv
#from test_env_for_lstm import MyEnv
from settings.initial_settings import *
from settings.reset_conditions import reset_conditions

from tensorflow.keras.utils import plot_model
from modules.savers import save_conditions
from utility.result_env import render_env
from utility.terminate_uavsimproc import teminate_proc
from utility.latest_learned_file_path import latest_learned_file_path
from utility.read_wright_weights import save_weights
from utility.read_wright_weights import reload_weights
from utility.save_logs import save_logs
from utility.save_logs import save_hists
from utility.save_logs import save_env_info

import matplotlib.pyplot as plt
import matplotlib
import tensorflow as tf
import cv2
import ctypes
import warnings

#UCAV.exeが起動している場合、プロセスキルする。
teminate_proc.UAVsimprockill(proc_name="UCAV.exe")

warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) 
warnings.filterwarnings('ignore', category=matplotlib.MatplotlibDeprecationWarning)
np.set_printoptions(precision=3, suppress=True)
PROJECT = "UCAV"
TRIAL_ID = 2
TRIAL = 'test_' + str(TRIAL_ID)
EVAL_FREQ = 1
CONTINUAL = True
NUM_EVAL = 10
def custom_log_creator(custom_path, custom_str):
    timestr = datetime.datetime.today().strftime("%Y-%m-%d_%H-%M-%S")
    logdir_prefix = "{}_{}".format(custom_str, timestr)

    def logger_creator(config):
        if not os.path.exists(custom_path):
            os.makedirs(custom_path)
        logdir = tempfile.mkdtemp(prefix=logdir_prefix, dir=custom_path)
        return UnifiedLogger(config, logdir, loggers=None)

    return logger_creator

ray.shutdown()
ray.init(ignore_reinit_error=True, log_to_driver=False)

#ModelCatalog.register_custom_model('my_model', MyRNNUAVClass)

eval_env = MyEnv()
policies_own = {
    "blue_0": (PPOTFPolicy, eval_env.observation_space, eval_env.action_space,
               {"model":{"vf_share_layers": False,"use_lstm": True,"max_seq_len": 200},
               "exploration_config": {"type": "StochasticSampling","random_timesteps":0},"explore":True,}),
    "blue_1": (PPOTFPolicy, eval_env.observation_space, eval_env.action_space,
               {"model":{"vf_share_layers": False,"use_lstm": True,"max_seq_len": 200},
               "exploration_config": {"type": "StochasticSampling","random_timesteps":0},"explore":True,}),
    "red_0": (PPOTFPolicy, eval_env.observation_space, eval_env.action_space,
              {"model":{"vf_share_layers": False,"use_lstm": True,"max_seq_len": 200},"explore":False,}),
    "red_1": (PPOTFPolicy, eval_env.observation_space, eval_env.action_space,
              {"model":{"vf_share_layers": False,"use_lstm": True,"max_seq_len": 200},"explore":False,}),
}
policies_enem = {
    "red_0": (PPOTFPolicy, eval_env.observation_space, eval_env.action_space,
              {"model":{"vf_share_layers": False,"use_lstm": True,"max_seq_len": 200},"explore":False,}),
    "red_1": (PPOTFPolicy, eval_env.observation_space, eval_env.action_space,
              {"model":{"vf_share_layers": False,"use_lstm": True,"max_seq_len": 200},"explore":False,}),
}
# policy_ids = list(policies.keys())

def policy_mapping_fn(agent_id, episode, **kwargs):
    #print(agent_id,episode)
    #pol_id = policy_ids[agent_id]

    pol_id = agent_id
    return pol_id

# Instanciate the evaluation env
config_own = ppo.DEFAULT_CONFIG.copy()
config_own = {"env": MyEnv,"num_gpus": 1,"num_workers": 0, "num_cpus_per_worker": 0,"num_gpus_per_worker": 1,
          "train_batch_size": 1200*25,
          "batch_mode": "complete_episodes",
          "gamma":0.995, "lr": 2.5e-4,"shuffle_sequences": True,
          "observation_space":eval_env.observation_space,"action_space":eval_env.action_space,
          "sgd_minibatch_size": 600, "num_sgd_iter":20,
          "multiagent": {"policies": policies_own,  "policy_mapping_fn": policy_mapping_fn}
         }
config_enem = ppo.DEFAULT_CONFIG.copy()
config_enem = {"env": MyEnv,"num_gpus": 1,"num_workers": 0, "num_cpus_per_worker": 0,"num_gpus_per_worker": 1,
          "train_batch_size": 600*5*10,
          "batch_mode": "complete_episodes",
          "gamma":0.995, "lr": 2.5e-4,"shuffle_sequences": True,
          "observation_space":eval_env.observation_space,"action_space":eval_env.action_space,
          "sgd_minibatch_size": 600, "num_sgd_iter":20,
          "multiagent": {"policies": policies_enem,  "policy_mapping_fn": policy_mapping_fn}
         }

res_name = "test"
conditions_dir = os.path.join('./' + PROJECT + '/conditions/')

if not os.path.exists(conditions_dir):
    os.makedirs(conditions_dir)
save_conditions(conditions_dir)

# PPOTrainer()は、try_import_tfを使うと、なぜかTensorflowのeager modeのエラーになる。

trainer = ppo.PPOTrainer(config=config_own,
                         logger_creator=custom_log_creator(
                             os.path.expanduser("./" + PROJECT + "/logs"), TRIAL))

adversary = ppo.PPOTrainer(config=config_enem,
                         logger_creator=custom_log_creator(
                             os.path.expanduser("./" + PROJECT + "/logs"), TRIAL))

if CONTINUAL:
    # Continual learning: Need to specify the checkpoint
    # model_path = PROJECT + '/checkpoints/' + TRIAL + '/checkpoint_000197/checkpoint-197'
    model_path = latest_learned_file_path('./UCAV/checkpoints/test_2/*')
    
    #trainer.restore(checkpoint_path=model_path)
    #save_weights("blue_0",trainer)
    #save_weights("blue_1",trainer)

    reload_weights(policy_id="red_0",trainer=trainer,set_policy_id="blue_0")
    reload_weights(policy_id="red_1",trainer=trainer,set_policy_id="blue_1")
    reload_weights(policy_id="blue_0",trainer=trainer,set_policy_id="blue_0")
    reload_weights(policy_id="blue_1",trainer=trainer,set_policy_id="blue_1")
    save_weights("red_0",trainer)
    save_weights("red_1",trainer)


models_dir = os.path.join('./' + PROJECT + '/models/')
if not os.path.exists(models_dir):
    os.makedirs(models_dir)
hist_dir = os.path.join('./' + PROJECT + '/hist/')
if not os.path.exists(hist_dir):
    os.makedirs(hist_dir)
for j in range(2):
    text_name = models_dir + TRIAL + "blue_"+str(j) +'.txt'
    with open(text_name, "w") as fp:
        trainer.get_policy("blue_"+str(j)).model.base_model.summary(print_fn=lambda x: fp.write(x + "\r\n"))
    png_name = models_dir + TRIAL + '.png'
    plot_model(trainer.get_policy("blue_"+str(j)).model.base_model, to_file=png_name, show_shapes=True)



# Define checkpoint dir
check_point_dir = os.path.join('./' + PROJECT + '/checkpoints/', TRIAL)
if not os.path.exists(check_point_dir):
    os.makedirs(check_point_dir)

  for external in metadata.entry_points().get(self.group, []):

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
Could not import from numba, which means that some
parts of this code may run MUCH more slowly.  You
may wish to install numba.
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

2022-04-07 00:49:19,897	INFO trainer.py:2141 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
2022-04-07 00:49:19,899	INFO ppo.py:250 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
2022-04-07 00:49:19,899	INFO trainer.py:781 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.




2022-04-07 00:49:48,779	INFO trainable.py:130 -- Trainable.setup took 28.885 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.




2022-04-07 00:50:01,878	INFO trainable.py:130 -- Trainable.setup took 13.049 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model/model_to_dot to work.
You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model/model_to_dot to work.


In [None]:

eval_env.reset()
save_env_info(eval_env)
record_mode = 0
results_dir = os.path.join('./' + PROJECT + '/results/')

if not os.path.exists(results_dir):
    os.makedirs(results_dir)
results_file = results_dir + TRIAL + '.pkl'

best_reward = {"blue_0":0,"blue_1":0}
for steps in range(10001):
    # Training
    print(f'\n----------------- Training at steps:{steps} start! -----------------')
    eval_env.eval = False
    eval_env.self_play = True
    eval_env.reset()
    results = trainer.train()
    save_logs(res_name,results,steps,CONTINUAL)
    print(pretty_print(results))
    check_point = trainer.save(checkpoint_dir=check_point_dir)
    # Evaluation
    if steps % EVAL_FREQ == 0:
        print(f'\n-------------- Evaluation at steps:{steps} starting ! --------------')
        EVAL_reward = {"blue_0":0,"blue_1":0}
        #check_point = trainer.save(checkpoint_dir=check_point_dir)
        for i in range(NUM_EVAL):
            # print(f'\nEvaluation {i}:')
            #model_path = latest_learned_file_path('./UCAV/checkpoints/test_2/*')
            #trainer.restore(checkpoint_path=model_path)
            eval_env.eval = True
            obs = eval_env.reset()
            done = False
            
            step_num = 0
            #fig = plt.figure(1,figsize=(8.0, 6.0))
            ESC = 0x1B          # ESCキーの仮想キーコード
            trajectory_length = 100

            cell_size = 256
            state_0=[np.zeros(cell_size, np.float32),np.zeros(cell_size, np.float32)]
            state_1=[np.zeros(cell_size, np.float32),np.zeros(cell_size, np.float32)]
            state_2=[np.zeros(cell_size, np.float32),np.zeros(cell_size, np.float32)]
            state_3=[np.zeros(cell_size, np.float32),np.zeros(cell_size, np.float32)]
            action_dict0 = [0,0]
            action_dict1 = [0,0]
            action_dict2 = [0,0]
            action_dict3 = [0,0]
            rewards = {"blue_0":0,"blue_1":0}
            if record_mode == 0:
                file_name = "test_num" + str(steps) +str(i)
                #video = cv2.VideoWriter(file_name+'.mp4',0x00000020,20.0,(800,600))

            while True:
                action_dict = {}
                action_dict0 = trainer.compute_single_action(obs["blue_0"],
                                                             state=state_0,prev_action=None,prev_reward=None,
                                                             policy_id="blue_0",explore=False)
                action_dict1 = trainer.compute_single_action(obs["blue_1"],
                                                             state=state_1,prev_action=None,prev_reward=None,
                                                             policy_id="blue_1",explore=False)
                action_dict2 = trainer.compute_single_action(obs["red_0"],
                                                             state=state_2,prev_action=None,prev_reward=None,
                                                             policy_id="red_0",explore=False)
                action_dict3 = trainer.compute_single_action(obs["red_1"],
                                                             state=state_3,prev_action=None,prev_reward=None,
                                                             policy_id="red_1",explore=False)
                
                #action_dict0 = trainer.compute_single_action(obs["blue_0"],policy_id="blue_0")
                #action_dict1 = trainer.compute_single_action(obs["blue_1"],policy_id="blue_1")
                state_0 = action_dict0[1]
                state_1 = action_dict1[1]
                state_2 = action_dict2[1]
                state_3 = action_dict3[1]
                obs, rewards, dones, infos = eval_env.step({"blue_0": action_dict0[0],
                                                            "blue_1": action_dict1[0],
                                                            "red_0": action_dict2[0],
                                                            "red_1": action_dict3[0],})

                env_blue_pos_temp_mod, env_red_pos_temp_mod, env_mrm_pos_temp_mod = render_env.copy_from_env_mod(eval_env)
                if eval_env.timer == 1:
                    env_blue_pos_mod = env_blue_pos_temp_mod
                    env_red_pos_mod = env_red_pos_temp_mod
                    env_mrm_pos_mod = env_mrm_pos_temp_mod
                else:
                    env_blue_pos_mod = np.vstack([env_blue_pos_mod,env_blue_pos_temp_mod])
                    env_red_pos_mod = np.vstack([env_red_pos_mod,env_red_pos_temp_mod])
                    env_mrm_pos_mod = np.vstack([env_mrm_pos_mod,env_mrm_pos_temp_mod])
                EVAL_reward["blue_0"] += rewards["blue_0"]
                EVAL_reward["blue_1"] += rewards["blue_1"]
                # plt.clf()

                # plt.subplots_adjust(left=-0.1,right=1.1,bottom=-0.1,top=1.1)
                # fig.canvas.draw()
                # plt.pause(.01)

                #if record_mode == 0:
                    #img = np.array(fig.canvas.renderer.buffer_rgba())
                    #img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
                    #video.write(img.astype('uint8'))

                
                step_num = step_num + 1
                
                done = dones["__all__"]
                
                #print(f'rewards:{rewards}')
                #if record_mode == 0:
                #    img = eval_env.render_movie(file_name,step_num)
                #    video.write(img.astype('unit8'))
                #elif record_mode == 1:
                #    eval_env.render()
                #elif record_mode == 2:
                #    eval_env.render()
                    
                #env_blue_pos_temp, env_red_pos_temp, env_mrm_pos_temp = render_env.copy_from_env(eval_env)
                
                #env_blue_pos.append(env_blue_pos_temp)
                #env_red_pos.append(env_red_pos_temp)
                #env_mrm_pos.append(env_mrm_pos_temp)
                #step_num = step_num + 1
                # エピソードの終了処理
                if dones['__all__']:
                    save_hists("blue_"+str(i),steps,env_blue_pos_mod,hist_dir)
                    save_hists("red_"+str(i),steps,env_red_pos_mod,hist_dir)
                    save_hists("mrm_"+str(i),steps,env_mrm_pos_mod,hist_dir)
                    # print(f'all done at {env.steps}')
                    break
            if EVAL_reward["blue_0"]> best_reward["blue_0"]:
                save_weights("blue_0",trainer)
                reload_weights(policy_id="red_0",trainer=trainer,set_policy_id="blue_0")
                best_reward["blue_0"] = EVAL_reward["blue_0"]
            if EVAL_reward["blue_1"]> best_reward["blue_1"]:
                save_weights("blue_1",trainer)
                reload_weights(policy_id="red_1",trainer=trainer,set_policy_id="blue_1")
                best_reward["blue_1"] = EVAL_reward["blue_1"]

            
            #if record_mode == 0:
               # video.release()

ray.shutdown()

-------------------------- Scene: 0 --------------------------

----------------- Training at steps:0 start! -----------------
-------------------------- Scene: 0 --------------------------
-------------------------- Scene: 0 --------------------------
139 blue_1 DOWN
993 red_0 Shoot at blue_0
1004 red_0 Shoot at blue_0
1009 red_1 Shoot at blue_0
1020 red_1 Shoot at blue_0
1063 blue_0: Destroyed
TIME LIMIT LOSE
blue_0 False False 1063 -0.137729876277264 -50.49046887302631
blue_1 False False 1063 -0.10099940961059735 -1.2030305315428063
-------------------------- Scene: 0 --------------------------
96 blue_0 DOWN
123 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 123 -0.10100182800254708 -1.4057956680719084
blue_1 False False 123 -1.1009973613358803 -1.3046351003204857
-------------------------- Scene: 0 --------------------------
286 blue_1 Shoot at red_1 launch distance : 59727.15648536425 True True
297 blue_1 Shoot at red_1 launch distance : 54594.60364631783 True True
339 red_0 Shoo

903 red_0 Shoot at blue_0
907 red_1 Shoot at blue_0
956 blue_0: Destroyed
TIME LIMIT LOSE
blue_0 False False 956 -0.14035539926209917 -27.81297584437308
blue_1 False False 956 -0.10099916592876584 -13.91186228490049
-------------------------- Scene: 0 --------------------------
169 blue_0 DOWN
286 blue_1 Shoot at red_1 launch distance : 59941.69851106115 True True
Same tgt shoot
297 blue_1 Shoot at red_0 launch distance : 53341.94656222876 True True
397 blue_1 Splash :red_0
402 blue_1 Splash :red_1
WIN
blue_0 False True 402 12.885403091251595 11.682153073743095
blue_1 False True 402 28.855552344982936 150.33780873097484
-------------------------- Scene: 0 --------------------------
215 blue_1 DOWN
1072 red_0 Shoot at blue_0
1083 red_0 Shoot at blue_0
1096 red_1 Shoot at blue_0
1107 red_1 Shoot at blue_0
1125 blue_0: Destroyed
TIME LIMIT LOSE
blue_0 False False 1125 -0.13639441030852797 -58.83956388873293
blue_1 False False 1125 -0.10100097697519465 -1.3043427350525587
-----------------

TIME LIMIT LOSE
blue_0 False False 1200 0.9939995991934333 57.41493187135638
blue_1 False False 1200 0.8989995991934333 87.19214076258699
-------------------------- Scene: 0 --------------------------
160 blue_0 DOWN
287 blue_1 Shoot at red_0 launch distance : 58515.79836001424 True True
299 blue_1 Shoot at red_0 launch distance : 52252.87871330686 True True
323 red_1 Shoot at blue_1
334 red_1 Shoot at blue_1
388 blue_1: Destroyed
TIME LIMIT LOSE
blue_0 False False 388 -0.10099989022293851 -1.2036646903835808
blue_1 False False 388 0.35867057644372813 83.5051344706254
-------------------------- Scene: 0 --------------------------
296 blue_0 Shoot at red_1 launch distance : 59553.0030224472 True True
Same tgt shoot
306 blue_1 Shoot at red_0 launch distance : 59285.853108421274 True True
Same tgt shoot
309 blue_0 Shoot at red_1 launch distance : 54093.032920847756 True True
363 red_0 Shoot at blue_1
374 red_0 Shoot at blue_1
402 blue_0 Splash :red_1
429 blue_1: Destroyed
576 blue_0 DOWN




agent_timesteps_total: 62220
custom_metrics: {}
date: 2022-04-07_01-55-16
done: false
episode_len_mean: 740.7142857142857
episode_media: {}
episode_reward_max: 244.28145909301588
episode_reward_mean: 86.77617549669972
episode_reward_min: -60.143906623785504
episodes_this_iter: 42
episodes_total: 42
experiment_id: 67406e9c973b4678a42dadfef6f1ea34
hostname: DESKTOP
info:
  learner:
    blue_0:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 6.898582935333252
        entropy_coeff: 0.0
        kl: 0.11648424714803696
        model: {}
        policy_loss: 0.06078886240720749
        total_loss: 84.18666076660156
        vf_explained_var: -0.09636684507131577
        vf_loss: 84.10257720947266
      train: null
    blue_1:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 7.145285129547119
        entropy_coeff: 0.0
        kl: 0.07833901047706604
      

415 blue_1 Splash :red_1
428 blue_1 Splash :red_0
WIN
blue_0 False True 428 12.804086694288754 12.278733184423675
blue_1 False True 428 28.911564329802772 146.7465957833915
-------------------------- Scene: 0 --------------------------
289 blue_1 Shoot at red_1 launch distance : 57798.90334783686 True True
305 blue_1 Shoot at red_1 launch distance : 50123.46433540576 True True
341 red_0 Shoot at blue_1
352 red_0 Shoot at blue_1
400 blue_1 Splash :red_1
407 blue_1: Destroyed
Same tgt shoot
Same tgt shoot
719 blue_0 Shoot at red_0 launch distance : 36859.775174005204 True True
Same tgt shoot
Same tgt shoot
732 blue_0 Shoot at red_0 launch distance : 31933.0040788304 True True
787 blue_0 Splash :red_0
WIN
blue_0 False True 932 11.286560381402003 89.06324015459144
blue_1 False True 932 11.186553648068669 133.31934006690602
-------------------------- Scene: 0 --------------------------
304 blue_1 Shoot at red_0 launch distance : 59788.17698293599 True True
Same tgt shoot
316 blue_1 Shoot at

280 blue_0 Shoot at red_1 launch distance : 59893.32681708337 True True
292 blue_0 Shoot at red_1 launch distance : 54352.2640248587 True True
305 blue_1 Shoot at red_1 launch distance : 54772.53205078031 True True
Same tgt shoot
Same tgt shoot
Same tgt shoot
317 blue_1 Shoot at red_0 launch distance : 50745.819616471425 True True
325 red_0 Shoot at blue_0
336 red_0 Shoot at blue_0
399 blue_0 Splash :red_1
413 blue_1 Splash :red_0
WIN
blue_0 False True 536 12.137805970149254 148.59709056413382
blue_1 False True 536 12.237805970149255 123.7331900513434
-------------------------- Scene: 0 --------------------------
109 blue_1 DOWN
173 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 173 -1.1009984021710888 -1.4054858237128787
blue_1 False False 173 -0.10100120217108881 -1.305399932829955
-------------------------- Scene: 0 --------------------------
251 blue_0 DOWN
1067 red_1 Shoot at blue_1
1078 red_1 Shoot at blue_1
1103 red_0 Shoot at blue_1
1114 red_0 Shoot at blue_1
1123 blue_1: Destr

114 blue_1 DOWN
286 blue_0 Shoot at red_1 launch distance : 58881.146498206 True True
Same tgt shoot
297 blue_0 Shoot at red_0 launch distance : 57315.60638570151 True True
399 blue_0 Splash :red_1
405 blue_0 Splash :red_0
WIN
blue_0 False True 405 29.389227861934046 135.33817588287386
blue_1 False True 405 12.863301936008119 11.65966628390034
-------------------------- Scene: 0 --------------------------
47 blue_1 DOWN
508 blue_0 Shoot at red_1 launch distance : 59946.73100022062 True True
Same tgt shoot
519 blue_0 Shoot at red_0 launch distance : 58767.668860723985 True True
622 blue_0 Splash :red_1
632 blue_0 Splash :red_0
WIN
blue_0 False True 632 26.196686744114203 117.78490946530047
blue_1 False True 632 11.799226423017156 10.59564163090721
-------------------------- Scene: 0 --------------------------
299 blue_0 Shoot at red_0 launch distance : 58852.044549074126 True True
310 blue_0 Shoot at red_0 launch distance : 54166.34060796269 True True
352 red_1 Shoot at blue_0
363 red_1



agent_timesteps_total: 123172
custom_metrics: {}
date: 2022-04-07_03-18-44
done: false
episode_len_mean: 733.1666666666666
episode_media: {}
episode_reward_max: 275.97254731052624
episode_reward_mean: 85.70543377124307
episode_reward_min: -118.78976251663728
episodes_this_iter: 42
episodes_total: 84
experiment_id: 67406e9c973b4678a42dadfef6f1ea34
hostname: DESKTOP
info:
  learner:
    blue_0:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 6.761950492858887
        entropy_coeff: 0.0
        kl: 0.11824727803468704
        model: {}
        policy_loss: 0.02042102813720703
        total_loss: 107.02874755859375
        vf_explained_var: 0.034133754670619965
        vf_loss: 106.98468017578125
      train: null
    blue_1:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 7.204351425170898
        entropy_coeff: 0.0
        kl: 0.1277862936258316
    

580 blue_1 Splash :red_0
594 blue_1 Splash :red_1
WIN
blue_0 False True 594 12.020664002613627 7.979810153535354
blue_1 False True 594 26.461072909684333 148.97772182842215
-------------------------- Scene: 0 --------------------------
206 blue_1 DOWN
308 blue_0 Shoot at red_1 launch distance : 59072.5312661039 True True
319 blue_0 Shoot at red_1 launch distance : 53892.668716453336 True True
343 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 343 -0.20299397040787248 36.078888375196215
blue_1 False False 343 -0.10100080374120596 -1.3046135111984434
-------------------------- Scene: 0 --------------------------
285 blue_0 Shoot at red_0 launch distance : 57483.8006835532 True True
299 blue_0 Shoot at red_0 launch distance : 50498.35003717651 True True
Same tgt shoot
Same tgt shoot
331 blue_1 Shoot at red_1 launch distance : 59969.44443393134 True True
334 red_1 Shoot at blue_0
Same tgt shoot
Same tgt shoot
342 blue_1 Shoot at red_1 launch distance : 55322.73466258778 True True
345 red_1

386 blue_1 Splash :red_1
390 blue_1 Splash :red_0
WIN
blue_0 False True 390 13.07726906990449 20.508748710256413
blue_1 False True 390 29.73110629041731 145.51463367774682
-------------------------- Scene: 0 --------------------------
275 blue_1 Shoot at red_0 launch distance : 58777.296403050204 True True
Same tgt shoot
302 blue_1 Shoot at red_1 launch distance : 56446.276551935676 True True
Same tgt shoot
335 blue_0 Shoot at red_1 launch distance : 58460.26121682707 True True
353 red_1 Shoot at blue_1
364 red_1 Shoot at blue_1
393 blue_1 Splash :red_0
415 blue_1 DOWN
Same tgt shoot
433 blue_0 Shoot at red_1 launch distance : 54648.30180845932 True True
452 blue_0 Splash :red_1
621 blue_0 DOWN
WIN
blue_0 False False 621 10.831367149758453 111.06534892969916
blue_1 False False 621 11.831367149758453 103.71211443622872
-------------------------- Scene: 0 --------------------------
165 blue_0 DOWN
431 blue_1 Shoot at red_0 launch distance : 52809.504318355415 True True
Same tgt shoot
447

806 red_0 Shoot at blue_0
817 red_0 Shoot at blue_0
851 red_1 Shoot at blue_0
862 red_1 Shoot at blue_0
895 blue_0: Destroyed
TIME LIMIT LOSE
blue_0 False False 1200 -0.1010009494336477 -27.80367580396684
blue_1 False False 1200 -0.1010009494336477 -72.5349022210426
-------------------------- Scene: 0 --------------------------
383 blue_1 Shoot at red_1 launch distance : 58505.55172539328 True True
394 blue_1 Shoot at red_1 launch distance : 53047.85573090203 True True
411 red_0 Shoot at blue_1
422 red_0 Shoot at blue_1
477 blue_1: Destroyed
496 blue_1 Splash :red_1
Same tgt shoot
Same tgt shoot
1109 blue_0 Shoot at red_0 launch distance : 59967.42515161827 True True
Same tgt shoot
Same tgt shoot
1120 blue_0 Shoot at red_0 launch distance : 57104.87836684804 True True
TIME LIMIT LOSE
blue_0 False False 1200 1.6990028038145184 56.82928638537477
blue_1 False False 1200 1.3989998371478518 108.17907904437234
-------------------------- Scene: 0 --------------------------
137 blue_1 DOWN
957



agent_timesteps_total: 183940
custom_metrics: {}
date: 2022-04-07_04-43-00
done: false
episode_len_mean: 705.35
episode_media: {}
episode_reward_max: 288.9730272876946
episode_reward_mean: 100.21977141933812
episode_reward_min: -118.78976251663728
episodes_this_iter: 43
episodes_total: 127
experiment_id: 67406e9c973b4678a42dadfef6f1ea34
hostname: DESKTOP
info:
  learner:
    blue_0:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 7.138902187347412
        entropy_coeff: 0.0
        kl: 0.1325727254152298
        model: {}
        policy_loss: 0.04043620452284813
        total_loss: 124.72760772705078
        vf_explained_var: 0.09508690237998962
        vf_loss: 124.66065979003906
      train: null
    blue_1:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 7.150270462036133
        entropy_coeff: 0.0
        kl: 0.09531896561384201
        model: 

277 blue_1 Shoot at red_1 launch distance : 57177.38977458421 True True
288 blue_1 Shoot at red_1 launch distance : 51760.98384422643 True True
330 red_0 Shoot at blue_1
341 red_0 Shoot at blue_1
365 blue_1 DOWN
386 blue_1 Splash :red_1
Same tgt shoot
Same tgt shoot
592 blue_0 Shoot at red_0 launch distance : 59494.70830202672 True True
Same tgt shoot
Same tgt shoot
603 blue_0 Shoot at red_0 launch distance : 54581.66629757736 True True
715 blue_0 Splash :red_0
WIN
blue_0 False True 803 11.494390114943961 130.99134997158717
blue_1 False True 803 11.39339601494396 102.20936214908292
-------------------------- Scene: 0 --------------------------
292 blue_1 Shoot at red_0 launch distance : 53416.56507543795 True True
Same tgt shoot
304 blue_1 Shoot at red_1 launch distance : 53721.94218892083 True True
395 blue_1 Splash :red_0
407 blue_1 Splash :red_1
WIN
blue_0 False True 407 12.948737115294112 6.706222915069609
blue_1 False True 407 29.345536612100013 150.03240087896413
----------------

352 blue_0 Shoot at red_1 launch distance : 57212.77461459193 True True
368 blue_0 Shoot at red_1 launch distance : 50356.18395631919 True True
467 blue_0 Splash :red_1
554 red_0 Shoot at blue_1
565 red_0 Shoot at blue_1
649 blue_1: Destroyed
TIME LIMIT LOSE
blue_0 False False 1200 1.1929999867647245 97.41920019288057
blue_1 False False 1200 0.8989999867647244 -7.408198147026904
-------------------------- Scene: 0 --------------------------
298 blue_1 Shoot at red_0 launch distance : 55259.63432365835 True True
Same tgt shoot
309 blue_1 Shoot at red_1 launch distance : 56863.42296513345 True True
399 blue_1 Splash :red_0
416 blue_1 Splash :red_1
WIN
blue_0 False True 416 12.884938165694045 6.989940417948723
blue_1 False True 416 29.154169734924814 142.64313318116447
-------------------------- Scene: 0 --------------------------
170 blue_0 DOWN
303 blue_1 Shoot at red_1 launch distance : 56943.50938097126 True True
318 blue_1 Shoot at red_1 launch distance : 50205.62612321015 True True


132 blue_0 DOWN
333 blue_1 Shoot at red_1 launch distance : 56583.216699276214 True True
349 blue_1 Shoot at red_1 launch distance : 50078.38652124357 True True
383 red_0 Shoot at blue_1
394 red_0 Shoot at blue_1
446 blue_1 Splash :red_1
467 blue_1: Destroyed
TIME LIMIT LOSE
blue_0 False False 467 0.8989995266959702 -0.40525595073196874
blue_1 False False 467 0.8595108600293035 78.82792013560716
-------------------------- Scene: 0 --------------------------
278 blue_1 Shoot at red_0 launch distance : 58494.46887019378 True True
294 blue_1 Shoot at red_0 launch distance : 50715.10689458707 True True
335 red_1 Shoot at blue_1
346 red_1 Shoot at blue_1
392 blue_1 Splash :red_0
394 blue_1: Destroyed
Same tgt shoot
Same tgt shoot
747 blue_0 Shoot at red_1 launch distance : 27166.9725846061 True True
Same tgt shoot
Same tgt shoot
774 blue_0 Shoot at red_1 launch distance : 21502.243601544393 True True
786 blue_0 Splash :red_1
WIN
blue_0 False True 974 11.231039320876112 31.484488835379942
bl



agent_timesteps_total: 244244
custom_metrics: {}
date: 2022-04-07_06-07-38
done: false
episode_len_mean: 684.22
episode_media: {}
episode_reward_max: 288.9730272876946
episode_reward_mean: 107.11708595540837
episode_reward_min: -113.06630441236808
episodes_this_iter: 47
episodes_total: 174
experiment_id: 67406e9c973b4678a42dadfef6f1ea34
hostname: DESKTOP
info:
  learner:
    blue_0:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 7.17786979675293
        entropy_coeff: 0.0
        kl: 0.121462881565094
        model: {}
        policy_loss: 0.024577980861067772
        total_loss: 109.26963806152344
        vf_explained_var: -0.12759947776794434
        vf_loss: 109.22077941894531
      train: null
    blue_1:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 7.232185363769531
        entropy_coeff: 0.0
        kl: 0.06995505094528198
        model: 

194 blue_1 DOWN
953 red_1 Shoot at blue_0
957 red_0 Shoot at blue_0
964 red_1 Shoot at blue_0
968 red_0 Shoot at blue_0
1024 blue_0: Destroyed
TIME LIMIT LOSE
blue_0 False False 1024 -0.13814011132648055 -44.12532379735881
blue_1 False False 1024 -0.10100004465981388 -1.3038074201709582
-------------------------- Scene: 0 --------------------------
300 blue_1 Shoot at red_0 launch distance : 59601.07821681777 True True
312 blue_1 Shoot at red_0 launch distance : 53506.787908470345 True True
357 red_1 Shoot at blue_1
368 red_1 Shoot at blue_1
372 blue_1 DOWN
414 blue_1 Splash :red_0
Same tgt shoot
Same tgt shoot
636 blue_0 Shoot at red_1 launch distance : 57999.63268482807 True True
Same tgt shoot
Same tgt shoot
647 blue_0 Shoot at red_1 launch distance : 53322.5317253394 True True
751 blue_0 Splash :red_1
WIN
blue_0 False True 847 11.41575915312869 88.2725265677756
blue_1 False True 847 11.315765053128688 86.77720070173753
-------------------------- Scene: 0 --------------------------


Same tgt shoot
305 blue_1 Shoot at red_0 launch distance : 54714.97525203673 True True
Same tgt shoot
Same tgt shoot
312 blue_0 Shoot at red_1 launch distance : 51223.606638446436 True True
405 blue_1 Splash :red_1
411 blue_1 Splash :red_0
WIN
blue_0 False True 512 12.34275 120.79699936666617
blue_1 False True 512 12.34275 122.98134138431979
-------------------------- Scene: 0 --------------------------
331 blue_1 Shoot at red_0 launch distance : 59472.760266371544 True True
342 blue_1 Shoot at red_0 launch distance : 53880.609739187326 True True
390 red_1 Shoot at blue_1
401 red_1 Shoot at blue_1
447 blue_1 Splash :red_0
457 blue_1: Destroyed
TIME LIMIT LOSE
blue_0 False False 1200 0.9939992301321372 -10.186229993199108
blue_1 False False 1200 0.8989992301321372 113.43490763792542
-------------------------- Scene: 0 --------------------------
207 blue_1 DOWN
739 red_0 Shoot at blue_0
739 red_1 Shoot at blue_0
750 red_0 Shoot at blue_0
750 red_1 Shoot at blue_0
817 blue_0: Destroyed
TI

123 blue_1 DOWN
646 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 646 -1.100994469162396 -4.066139233463974
blue_1 False False 646 -0.10100106916239611 -1.3051639163241644
-------------------------- Scene: 0 --------------------------
295 blue_0 Shoot at red_1 launch distance : 53797.74122261665 True True
306 blue_0 Shoot at red_1 launch distance : 48451.2732488318 True True
Same tgt shoot
Same tgt shoot
310 blue_1 Shoot at red_0 launch distance : 58682.36707089443 True True
Same tgt shoot
Same tgt shoot
321 blue_1 Shoot at red_0 launch distance : 53846.75236969442 True True
340 red_0 Shoot at blue_0
397 blue_0 Splash :red_1
420 blue_1 Splash :red_0
WIN
blue_0 False True 540 12.221222222222222 138.68980393926614
blue_1 False True 540 12.222218255555555 115.24616423650797
-------------------------- Scene: 0 --------------------------
118 blue_1 DOWN
362 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 362 -1.1009985308474066 -3.8421225445969225
blue_1 False False 362 -0.10100133084740671

194 blue_0 DOWN
331 blue_1 Shoot at red_0 launch distance : 59587.16686553905 True True
Same tgt shoot
343 blue_1 Shoot at red_1 launch distance : 51193.671430938586 True True
366 red_1 Shoot at blue_1
434 blue_1 Splash :red_0
441 blue_1 Splash :red_1
WIN
blue_0 False True 566 12.019141342756184 10.916206417381176
blue_1 False True 566 12.019141342756184 138.95251426388833
-------------------------- Scene: 0 --------------------------
277 blue_1 Shoot at red_1 launch distance : 59253.52486972156 True True
Same tgt shoot
284 blue_0 Shoot at red_0 launch distance : 58541.705672945856 True True
Same tgt shoot
288 blue_1 Shoot at red_0 launch distance : 56596.77257129252 True True
Same tgt shoot
Same tgt shoot
295 blue_0 Shoot at red_1 launch distance : 50309.49883191507 True True
393 blue_1 Splash :red_1
398 blue_1 Splash :red_0
WIN
blue_0 False True 495 12.423242424242424 124.673491424242
blue_1 False True 495 12.423241324242424 114.69242007368554
-------------------------- Scene: 0 ----



agent_timesteps_total: 304308
custom_metrics: {}
date: 2022-04-07_07-31-52
done: false
episode_len_mean: 638.34
episode_media: {}
episode_reward_max: 273.4555977906113
episode_reward_mean: 104.09534199916172
episode_reward_min: -109.13912387070168
episodes_this_iter: 48
episodes_total: 222
experiment_id: 67406e9c973b4678a42dadfef6f1ea34
hostname: DESKTOP
info:
  learner:
    blue_0:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 6.952114105224609
        entropy_coeff: 0.0
        kl: 0.10611312836408615
        model: {}
        policy_loss: 0.01581731252372265
        total_loss: 191.95326232910156
        vf_explained_var: 0.03356899693608284
        vf_loss: 191.9162139892578
      train: null
    blue_1:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 7.354020118713379
        entropy_coeff: 0.0
        kl: 0.08048564940690994
        model: 

113 blue_1 DOWN
307 blue_0 Shoot at red_0 launch distance : 58410.688798288116 True True
318 blue_0 Shoot at red_0 launch distance : 52842.81005234402 True True
349 red_1 Shoot at blue_0
360 red_1 Shoot at blue_0
415 blue_0: Destroyed
TIME LIMIT LOSE
blue_0 False False 415 0.3588925548772347 99.72133167946676
blue_1 False False 415 0.3989999548772347 -0.9058459858091101
-------------------------- Scene: 0 --------------------------
125 blue_1 DOWN
165 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 165 -1.1009997197564747 -1.2035736788793092
blue_1 False False 165 -0.10100208642314144 -1.3049213154603767
-------------------------- Scene: 0 --------------------------
321 blue_0 Shoot at red_0 launch distance : 59631.44361723831 True True
Same tgt shoot
333 blue_0 Shoot at red_1 launch distance : 54785.89632051563 True True
439 blue_0 Splash :red_1
440 blue_0 Splash :red_0
WIN
blue_0 False True 440 28.082178923972194 154.29103000239525
blue_1 False True 440 12.727628169426742 3.1106405272

106 blue_1 DOWN
227 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 227 -1.1010057221774658 -1.4060578420177514
blue_1 False False 227 -0.10100158884413245 -1.4050391512509541
-------------------------- Scene: 0 --------------------------
327 blue_0 Shoot at red_0 launch distance : 59980.76635684963 True True
339 blue_1 Shoot at red_0 launch distance : 57791.34264165966 True True
341 blue_0 Shoot at red_0 launch distance : 54107.04389532869 True True
Same tgt shoot
Same tgt shoot
Same tgt shoot
397 blue_1 Shoot at red_1 launch distance : 46191.14972363225 True True
400 red_1 Shoot at blue_0
411 red_1 Shoot at blue_0
446 blue_0 Splash :red_0
484 blue_1 Splash :red_1
WIN
blue_0 False True 611 11.962993453355155 137.38640893930392
blue_1 False True 611 11.962993453355155 110.46691867263858
-------------------------- Scene: 0 --------------------------
205 blue_0 DOWN
291 blue_1 Shoot at red_1 launch distance : 54734.21491472551 True True
Same tgt shoot
304 blue_1 Shoot at red_0 launch dist

287 blue_0 Shoot at red_0 launch distance : 58844.86758368663 True True
Same tgt shoot
299 blue_0 Shoot at red_1 launch distance : 57710.08719199633 True True
402 blue_0 Splash :red_0
410 blue_0 Splash :red_1
WIN
blue_0 False True 410 29.280807017143918 155.2318795586094
blue_1 False True 410 12.92715138055855 10.759878868292681
-------------------------- Scene: 0 --------------------------
188 blue_1 DOWN
368 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 368 -1.1030013126729434 -3.5343439226887687
blue_1 False False 368 -0.10100171267294344 -1.3039640057698463
-------------------------- Scene: 0 --------------------------
115 blue_1 DOWN
296 blue_0 Shoot at red_0 launch distance : 58571.79097063543 True True
Same tgt shoot
308 blue_0 Shoot at red_1 launch distance : 59460.37319059354 True True
412 blue_0 Splash :red_0
427 blue_0 Splash :red_1
WIN
blue_0 False True 427 28.931243341758265 121.86940096739278
blue_1 False True 427 12.71063444246084 11.507283160879943
--------------------

307 blue_1 Shoot at red_0 launch distance : 48052.03767427843 True True
Same tgt shoot
Same tgt shoot
340 blue_0 Shoot at red_1 launch distance : 59777.45523926003 True True
345 red_1 Shoot at blue_1
Same tgt shoot
Same tgt shoot
353 blue_0 Shoot at red_1 launch distance : 54262.68173161067 True True
356 red_1 Shoot at blue_1
396 blue_1 Splash :red_0
449 blue_0 DOWN
457 blue_0 Splash :red_1
WIN
blue_0 False True 556 12.057273381294964 132.52580547545202
blue_1 False True 556 12.157273381294965 157.39612440856746
-------------------------- Scene: 0 --------------------------
291 blue_1 Shoot at red_1 launch distance : 57905.130609714586 True True
297 blue_0 Shoot at red_1 launch distance : 52857.741553393345 True True
303 blue_1 Shoot at red_1 launch distance : 53167.65853253008 True True
308 blue_0 Shoot at red_1 launch distance : 48425.06579683433 True True
346 red_0 Shoot at blue_0
357 red_0 Shoot at blue_0
398 blue_1 Splash :red_1
412 blue_0: Destroyed
TIME LIMIT LOSE
blue_0 False F



agent_timesteps_total: 365422
custom_metrics: {}
date: 2022-04-07_08-56-40
done: false
episode_len_mean: 633.87
episode_media: {}
episode_reward_max: 289.921929884021
episode_reward_mean: 101.94915877040357
episode_reward_min: -125.19173855777169
episodes_this_iter: 47
episodes_total: 269
experiment_id: 67406e9c973b4678a42dadfef6f1ea34
hostname: DESKTOP
info:
  learner:
    blue_0:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 7.106440544128418
        entropy_coeff: 0.0
        kl: 0.08672266453504562
        model: {}
        policy_loss: 0.018908565863966942
        total_loss: 111.85910034179688
        vf_explained_var: 0.13868620991706848
        vf_loss: 111.82283782958984
      train: null
    blue_1:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 7.42042350769043
        entropy_coeff: 0.0
        kl: 0.07520810514688492
        model: 

285 blue_0 Shoot at red_0 launch distance : 59120.48272167667 True True
288 blue_1 Shoot at red_0 launch distance : 55602.42760321751 True True
296 blue_0 Shoot at red_0 launch distance : 53716.544396753954 True True
303 blue_1 Shoot at red_0 launch distance : 48740.0785613364 True True
338 red_1 Shoot at blue_1
349 red_1 Shoot at blue_1
396 blue_1 Splash :red_0
397 blue_1: Destroyed
TIME LIMIT LOSE
blue_0 False False 1200 0.994008619939092 58.7633679086321
blue_1 False False 1200 0.8990006532724253 90.8314252592898
-------------------------- Scene: 0 --------------------------
245 blue_0 DOWN
288 blue_1 Shoot at red_0 launch distance : 59359.30813328252 True True
302 blue_1 Shoot at red_0 launch distance : 53701.16718084647 True True
339 red_1 Shoot at blue_1
350 red_1 Shoot at blue_1
398 blue_1: Destroyed
TIME LIMIT LOSE
blue_0 False False 398 -0.10099991971150377 -3.5335497476647886
blue_1 False False 398 0.35899281362182955 76.85145209594938
-------------------------- Scene: 0 ----