In [1]:
%matplotlib tk

import argparse
import gym
import datetime
import os
import random
import tempfile
import numpy as np
import pickle

import ray
from ray import tune
from ray.tune.logger import Logger, UnifiedLogger, pretty_print
from ray.rllib.env.multi_agent_env import make_multi_agent
from ray.rllib.examples.models.shared_weights_model import TF2SharedWeightsModel
from ray.rllib.models import ModelCatalog
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.test_utils import check_learning_achieved
from ray.rllib.agents.ppo import ppo, PPOTrainer, PPOTFPolicy
from ray.rllib.agents.a3c.a3c_tf_policy import A3CTFPolicy
from ray.rllib.agents.a3c import a3c
from ray.rllib.models import ModelCatalog
from ray.rllib.policy.policy import PolicySpec
from environment_rllib_3d1 import MyEnv
#from test_env_for_lstm import MyEnv
from settings.initial_settings import *
from settings.reset_conditions import reset_conditions

from tensorflow.keras.utils import plot_model
from modules.savers import save_conditions
from utility.result_env import render_env
from utility.terminate_uavsimproc import teminate_proc
from utility.latest_learned_file_path import latest_learned_file_path
from utility.read_wright_weights import save_weights
from utility.read_wright_weights import reload_weights
from utility.save_logs import save_logs
from utility.save_logs import save_hists
from utility.save_logs import save_env_info

import matplotlib.pyplot as plt
import matplotlib
import tensorflow as tf
import cv2
import ctypes
import warnings

#UCAV.exeが起動している場合、プロセスキルする。
teminate_proc.UAVsimprockill(proc_name="UCAV.exe")

warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) 
warnings.filterwarnings('ignore', category=matplotlib.MatplotlibDeprecationWarning)
np.set_printoptions(precision=3, suppress=True)
PROJECT = "UCAV"
TRIAL_ID = 2
TRIAL = 'test_' + str(TRIAL_ID)
EVAL_FREQ = 1
CONTINUAL = True
NUM_EVAL = 1
def custom_log_creator(custom_path, custom_str):
    timestr = datetime.datetime.today().strftime("%Y-%m-%d_%H-%M-%S")
    logdir_prefix = "{}_{}".format(custom_str, timestr)

    def logger_creator(config):
        if not os.path.exists(custom_path):
            os.makedirs(custom_path)
        logdir = tempfile.mkdtemp(prefix=logdir_prefix, dir=custom_path)
        return UnifiedLogger(config, logdir, loggers=None)

    return logger_creator

ray.shutdown()
ray.init(ignore_reinit_error=True, log_to_driver=False)

#ModelCatalog.register_custom_model('my_model', MyRNNUAVClass)

eval_env = MyEnv()
policies_own = {
    "blue_0": (PPOTFPolicy, eval_env.observation_space, eval_env.action_space,
               {"model":{"vf_share_layers": False,"use_lstm": True,"max_seq_len": 200},
               "exploration_config": {"type": "StochasticSampling","random_timesteps":0},"explore":True,}),
    "blue_1": (PPOTFPolicy, eval_env.observation_space, eval_env.action_space,
               {"model":{"vf_share_layers": False,"use_lstm": True,"max_seq_len": 200},
               "exploration_config": {"type": "StochasticSampling","random_timesteps":0},"explore":True,}),
    #"red_0": (PPOTFPolicy, eval_env.observation_space, eval_env.action_space,
    #          {"model":{"vf_share_layers": False,"use_lstm": True,"max_seq_len": 200},"explore":False,}),
    #"red_1": (PPOTFPolicy, eval_env.observation_space, eval_env.action_space,
    #          {"model":{"vf_share_layers": False,"use_lstm": True,"max_seq_len": 200},"explore":False,}),
}
policies_enem = {
    "red_0": (PPOTFPolicy, eval_env.observation_space, eval_env.action_space,
              {"model":{"vf_share_layers": False,"use_lstm": True,"max_seq_len": 200},"explore":False,}),
    "red_1": (PPOTFPolicy, eval_env.observation_space, eval_env.action_space,
              {"model":{"vf_share_layers": False,"use_lstm": True,"max_seq_len": 200},"explore":False,}),
}
# policy_ids = list(policies.keys())

def policy_mapping_fn(agent_id, episode, **kwargs):
    #print(agent_id,episode)
    #pol_id = policy_ids[agent_id]

    pol_id = agent_id
    return pol_id

# Instanciate the evaluation env
config_own = ppo.DEFAULT_CONFIG.copy()
config_own = {"env": MyEnv,"num_gpus": 1,"num_workers": 0, "num_cpus_per_worker": 0,"num_gpus_per_worker": 1,
          "train_batch_size": 1200*25,
          "batch_mode": "complete_episodes",
          "gamma":0.995, "lr": 2.5e-4,"shuffle_sequences": True,
          "observation_space":eval_env.observation_space,"action_space":eval_env.action_space,
          "sgd_minibatch_size": 600, "num_sgd_iter":20,
          "multiagent": {"policies": policies_own,  "policy_mapping_fn": policy_mapping_fn}
         }
config_enem = ppo.DEFAULT_CONFIG.copy()
config_enem = {"env": MyEnv,"num_gpus": 1,"num_workers": 0, "num_cpus_per_worker": 0,"num_gpus_per_worker": 1,
          "train_batch_size": 600*5*10,
          "batch_mode": "complete_episodes",
          "gamma":0.995, "lr": 2.5e-4,"shuffle_sequences": True,
          "observation_space":eval_env.observation_space,"action_space":eval_env.action_space,
          "sgd_minibatch_size": 600, "num_sgd_iter":20,
          "multiagent": {"policies": policies_enem,  "policy_mapping_fn": policy_mapping_fn}
         }

res_name = "test"
conditions_dir = os.path.join('./' + PROJECT + '/conditions/')

if not os.path.exists(conditions_dir):
    os.makedirs(conditions_dir)
save_conditions(conditions_dir)

# PPOTrainer()は、try_import_tfを使うと、なぜかTensorflowのeager modeのエラーになる。

trainer = ppo.PPOTrainer(config=config_own,
                         logger_creator=custom_log_creator(
                             os.path.expanduser("./" + PROJECT + "/logs"), TRIAL))

adversary = ppo.PPOTrainer(config=config_enem,
                         logger_creator=custom_log_creator(
                             os.path.expanduser("./" + PROJECT + "/logs"), TRIAL))

if CONTINUAL:
    # Continual learning: Need to specify the checkpoint
    # model_path = PROJECT + '/checkpoints/' + TRIAL + '/checkpoint_000197/checkpoint-197'
    model_path = latest_learned_file_path('./UCAV/checkpoints/test_2/*')
    
    #trainer.restore(checkpoint_path=model_path)
    #save_weights("blue_0",trainer)
    #save_weights("blue_1",trainer)

    #reload_weights(policy_id="red_0",trainer=trainer,set_policy_id="blue_0")
    #reload_weights(policy_id="red_1",trainer=trainer,set_policy_id="blue_1")
    reload_weights(policy_id="blue_0",trainer=trainer,set_policy_id="blue_0")
    reload_weights(policy_id="blue_1",trainer=trainer,set_policy_id="blue_1")
    #save_weights("red_0",trainer)
    #save_weights("red_1",trainer)


models_dir = os.path.join('./' + PROJECT + '/models/')
if not os.path.exists(models_dir):
    os.makedirs(models_dir)
hist_dir = os.path.join('./' + PROJECT + '/hist/')
if not os.path.exists(hist_dir):
    os.makedirs(hist_dir)

for j in range(2):
    text_name = models_dir + TRIAL + "blue_"+str(j) +'.txt'
    with open(text_name, "w") as fp:
        trainer.get_policy("blue_"+str(j)).model.base_model.summary(print_fn=lambda x: fp.write(x + "\r\n"))
    png_name = models_dir + TRIAL + '.png'
    plot_model(trainer.get_policy("blue_"+str(j)).model.base_model, to_file=png_name, show_shapes=True)



# Define checkpoint dir
check_point_dir = os.path.join('./' + PROJECT + '/checkpoints/', TRIAL)
if not os.path.exists(check_point_dir):
    os.makedirs(check_point_dir)

  for external in metadata.entry_points().get(self.group, []):

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
Could not import from numba, which means that some
parts of this code may run MUCH more slowly.  You
may wish to install numba.
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

2022-04-05 00:07:36,456	INFO trainer.py:2141 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
2022-04-05 00:07:36,458	INFO ppo.py:250 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
2022-04-05 00:07:36,458	INFO trainer.py:781 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.




2022-04-05 00:07:50,665	INFO trainable.py:130 -- Trainable.setup took 14.212 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.




2022-04-05 00:08:03,989	INFO trainable.py:130 -- Trainable.setup took 13.279 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model/model_to_dot to work.
You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model/model_to_dot to work.


In [None]:

eval_env.reset()
save_env_info(eval_env)
record_mode = 0
results_dir = os.path.join('./' + PROJECT + '/results/')

if not os.path.exists(results_dir):
    os.makedirs(results_dir)
results_file = results_dir + TRIAL + '.pkl'
for steps in range(10001):
    # Training
    print(f'\n----------------- Training at steps:{steps} start! -----------------')
    eval_env.eval = False
    eval_env.reset()
    results = trainer.train()
    save_logs(res_name,results,steps,CONTINUAL)
    print(pretty_print(results))
    #check_point = trainer.save(checkpoint_dir=check_point_dir)
    # Evaluation
    if steps % EVAL_FREQ == 0:
        print(f'\n-------------- Evaluation at steps:{steps} starting ! --------------')

        check_point = trainer.save(checkpoint_dir=check_point_dir)
        for i in range(NUM_EVAL):
            # print(f'\nEvaluation {i}:')
            model_path = latest_learned_file_path('./UCAV/checkpoints/test_2/*')
            trainer.restore(checkpoint_path=model_path)
            eval_env.eval = True
            obs = eval_env.reset()
            done = False
            
            step_num = 0
            #fig = plt.figure(1,figsize=(8.0, 6.0))
            ESC = 0x1B          # ESCキーの仮想キーコード
            trajectory_length = 100

            cell_size = 256
            state_0=[np.zeros(cell_size, np.float32),np.zeros(cell_size, np.float32)]
            state_1=[np.zeros(cell_size, np.float32),np.zeros(cell_size, np.float32)]
            action_dict0 = [0,0]
            action_dict1 = [0,0]
            rewards = {"blue_0":0,"blue_1":0}
            if record_mode == 0:
                file_name = "test_num" + str(steps) +str(i)
                #video = cv2.VideoWriter(file_name+'.mp4',0x00000020,20.0,(800,600))

            while True:
                action_dict = {}
                action_dict0 = trainer.compute_single_action(obs['blue_0'],
                                                             state=state_0,prev_action=None,prev_reward=None,
                                                             policy_id='blue_0',explore=False)
                action_dict1 = trainer.compute_single_action(obs['blue_1'],
                                                             state=state_1,prev_action=None,prev_reward=None,
                                                             policy_id='blue_1',explore=False)

                state_0 = action_dict0[1]
                state_1 = action_dict1[1]
                obs, rewards, dones, infos = eval_env.step({'blue_0': action_dict0[0], 'blue_1': action_dict1[0]})

                env_blue_pos_temp_mod, env_red_pos_temp_mod, env_mrm_pos_temp_mod = render_env.copy_from_env_mod(eval_env)
                if eval_env.timer == 1:
                    env_blue_pos_mod = env_blue_pos_temp_mod
                    env_red_pos_mod = env_red_pos_temp_mod
                    env_mrm_pos_mod = env_mrm_pos_temp_mod
                else:
                    env_blue_pos_mod = np.vstack([env_blue_pos_mod,env_blue_pos_temp_mod])
                    env_red_pos_mod = np.vstack([env_red_pos_mod,env_red_pos_temp_mod])
                    env_mrm_pos_mod = np.vstack([env_mrm_pos_mod,env_mrm_pos_temp_mod])


                #if record_mode == 0:
                    #img = np.array(fig.canvas.renderer.buffer_rgba())
                    #img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
                    #video.write(img.astype('uint8'))

                
                step_num = step_num + 1
                
                done = dones["__all__"]
                
                #print(f'rewards:{rewards}')
                #if record_mode == 0:
                #    img = eval_env.render_movie(file_name,step_num)
                #    video.write(img.astype('unit8'))
                #elif record_mode == 1:
                #    eval_env.render()
                #elif record_mode == 2:
                #    eval_env.render()

                # エピソードの終了処理
                if dones['__all__']:
                    save_hists("blue",steps,env_blue_pos_mod,hist_dir)
                    save_hists("red",steps,env_red_pos_mod,hist_dir)
                    save_hists("mrm",steps,env_mrm_pos_mod,hist_dir)
                    save_weights("blue_0",trainer)
                    save_weights("blue_1",trainer)
                    # print(f'all done at {env.steps}')
                    break
                

            
            #if record_mode == 0:
               # video.release()

ray.shutdown()

-------------------------- Scene: 0 --------------------------

----------------- Training at steps:0 start! -----------------
-------------------------- Scene: 0 --------------------------
-------------------------- Scene: 0 --------------------------
298 blue_0 Shoot at red_0 launch distance : 59323.498128835294 True True
311 blue_0 Shoot at red_0 launch distance : 52736.21868843753 True True
358 red_1 Shoot at blue_0
369 red_1 Shoot at blue_0
412 blue_0 Splash :red_0
423 blue_0: Destroyed
1125 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 1125 0.898999897848691 123.20104297013226
blue_1 False False 1125 -0.10700010215130895 12.156929864916068
-------------------------- Scene: 0 --------------------------
880 red_0 Shoot at blue_0
891 red_0 Shoot at blue_0
926 red_1 Shoot at blue_0
937 red_1 Shoot at blue_0
957 blue_0: Destroyed
TIME LIMIT LOSE
blue_0 False False 1200 -0.10099846659283274 -21.301288776924427
blue_1 False False 1200 -0.0009984665928327297 -47.17588964176178
---------

202 blue_1 DOWN
382 red_0 Shoot at blue_0
393 red_0 Shoot at blue_0
407 red_1 Shoot at blue_0
418 red_1 Shoot at blue_0
434 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 434 -1.113001054361241 9.619367609328057
blue_1 False False 434 -0.10100105436124107 -1.505900876740975
-------------------------- Scene: 0 --------------------------
301 blue_0 Shoot at red_1 launch distance : 59602.47440319478 True True
Same tgt shoot
313 blue_0 Shoot at red_0 launch distance : 56988.87103744915 True True
429 blue_0 Splash :red_1
430 blue_0 Splash :red_0
WIN
blue_0 False True 430 28.873444650914095 140.95071665099465
blue_1 False True 430 12.791054902076887 7.450359374418604
-------------------------- Scene: 0 --------------------------
123 blue_0 DOWN
797 red_0 Shoot at blue_1
808 red_0 Shoot at blue_1
820 red_1 Shoot at blue_1
831 red_1 Shoot at blue_1
894 blue_1: Destroyed
TIME LIMIT LOSE
blue_0 False False 894 -0.1009984531958899 -1.2048257697158644
blue_1 False False 894 -0.1435065531958899 -37

WIN
blue_0 False True 540 12.121216255555554 138.77581334737917
blue_1 False True 540 12.221222222222222 118.23707641616123
-------------------------- Scene: 0 --------------------------
361 blue_1 Shoot at red_1 launch distance : 58236.12722574541 True True
Same tgt shoot
385 blue_1 Shoot at red_0 launch distance : 52509.5597689314 True True
414 blue_0 DOWN
426 red_0 Shoot at blue_1
437 red_0 Shoot at blue_1
488 blue_1: Destroyed
TIME LIMIT LOSE
blue_0 False False 488 0.39900010415860604 -4.026368526967968
blue_1 False False 488 0.35889700415860604 63.88023935280443
-------------------------- Scene: 0 --------------------------
487 red_1 Shoot at blue_1
498 red_1 Shoot at blue_1
502 red_0 Shoot at blue_1
513 red_0 Shoot at blue_1
557 blue_1: Destroyed
1017 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 1017 -1.1070006898486364 -43.673709035918534
blue_1 False False 1017 -0.10100068984863637 -17.29971089607237
-------------------------- Scene: 0 --------------------------
298 blue_1 Sh

336 blue_1 Shoot at red_0 launch distance : 58230.535680590474 True True
461 blue_1 Splash :red_0
487 red_1 Shoot at blue_0
498 red_1 Shoot at blue_0
576 blue_0: Destroyed
TIME LIMIT LOSE
blue_0 False False 1200 0.8990004493472885 -2.9234084800603606
blue_1 False False 1200 0.8990004493472885 32.393166438243256
-------------------------- Scene: 0 --------------------------
185 blue_0 DOWN
375 red_0 Shoot at blue_1
386 red_0 Shoot at blue_1
386 red_1 Shoot at blue_1
397 red_1 Shoot at blue_1
448 blue_1: Destroyed
TIME LIMIT LOSE
blue_0 False False 448 -0.10100035979083972 -1.3046125666158817
blue_1 False False 448 -0.14145395979083975 -13.64308181355713
-------------------------- Scene: 0 --------------------------
720 red_1 Shoot at blue_1
727 red_0 Shoot at blue_1
731 red_1 Shoot at blue_1
738 red_0 Shoot at blue_1
785 blue_0 DOWN
817 blue_1: Destroyed
TIME LIMIT LOSE
blue_0 False False 817 -0.10099842468814772 -14.343605766096157
blue_1 False False 817 -0.14287325802148107 -32.682440



agent_timesteps_total: 60696
custom_metrics: {}
date: 2022-04-05_01-11-47
done: false
episode_len_mean: 674.4
episode_media: {}
episode_reward_max: 270.44983269536766
episode_reward_mean: 103.59936363342135
episode_reward_min: -119.8860543335177
episodes_this_iter: 45
episodes_total: 45
experiment_id: b7bc6bb0bdfa4636b9eb71264eb36a37
hostname: DESKTOP
info:
  learner:
    blue_0:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 6.862703800201416
        entropy_coeff: 0.0
        kl: 0.08954828977584839
        model: {}
        policy_loss: 0.002263125032186508
        total_loss: 391.0355224609375
        vf_explained_var: -0.13413006067276
        vf_loss: 391.0153503417969
      train: null
    blue_1:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 6.747198581695557
        entropy_coeff: 0.0
        kl: 0.058865465223789215
        model: {}
 

2022-04-05 01:11:47,800	INFO trainable.py:496 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000001\checkpoint-1
2022-04-05 01:11:47,801	INFO trainable.py:503 -- Current state after restoring: {'_iteration': 1, '_timesteps_total': 30348, '_time_total': 3815.873510837555, '_episodes_total': 45}


-------------------------- Scene: 0 --------------------------
274 blue_0 Shoot at red_0 launch distance : 59450.06966447068 True True
285 blue_0 Shoot at red_0 launch distance : 53819.06790109737 True True
301 red_1 Shoot at blue_0
312 red_1 Shoot at blue_0
356 blue_0: Destroyed
383 blue_0 Splash :red_0
TIME LIMIT LOSE
blue_0 False False 1200 0.8990006046521788 88.00677418550706
blue_1 False False 1200 0.8930006046521788 45.24577457423132

----------------- Training at steps:1 start! -----------------
-------------------------- Scene: 0 --------------------------
284 blue_0 Shoot at red_1 launch distance : 58380.603798420794 True True
295 blue_0 Shoot at red_1 launch distance : 53325.65576135979 True True
312 blue_1 Shoot at red_1 launch distance : 59555.74148335227 True True
Same tgt shoot
Same tgt shoot
Same tgt shoot
323 blue_1 Shoot at red_0 launch distance : 59744.52262899521 True True
342 red_0 Shoot at blue_0
353 red_0 Shoot at blue_0
386 blue_0: Destroyed
399 blue_0 Splash :re

459 blue_0 Splash :red_0
469 blue_0: Destroyed
TIME LIMIT LOSE
blue_0 False False 469 0.8587088165416379 124.63524719269171
blue_1 False False 469 0.8989994498749712 -0.40447173683591675
-------------------------- Scene: 0 --------------------------
275 blue_1 Shoot at red_1 launch distance : 58838.28545250363 True True
287 blue_1 Shoot at red_1 launch distance : 52856.431081739735 True True
299 blue_0 DOWN
323 red_0 Shoot at blue_1
334 red_0 Shoot at blue_1
386 blue_1 Splash :red_1
390 blue_1: Destroyed
TIME LIMIT LOSE
blue_0 False False 390 0.898998887766891 2.8644786453549345
blue_1 False False 390 0.8586517544335577 117.04506831051702
-------------------------- Scene: 0 --------------------------
284 blue_1 Shoot at red_0 launch distance : 58216.124655316635 True True
Same tgt shoot
296 blue_1 Shoot at red_1 launch distance : 56578.00777024969 True True
399 blue_1 Splash :red_0
405 blue_1 Splash :red_1
WIN
blue_0 False True 405 12.964284964572164 9.557234896296295
blue_1 False True

312 blue_1 Shoot at red_0 launch distance : 53358.07519207913 True True
354 red_1 Shoot at blue_1
365 red_1 Shoot at blue_1
416 blue_1 Splash :red_0
426 blue_1: Destroyed
TIME LIMIT LOSE
blue_0 False False 426 0.8989995167582163 -1.235344499985118
blue_1 False False 426 0.8585207834248829 116.58955650540292
-------------------------- Scene: 0 --------------------------
284 blue_1 Shoot at red_0 launch distance : 55580.066039665064 True True
Same tgt shoot
297 blue_1 Shoot at red_1 launch distance : 54498.82098087294 True True
388 blue_1 Splash :red_0
401 blue_1 Splash :red_1
WIN
blue_0 False True 401 12.993842896048365 12.812137003241896
blue_1 False True 401 29.478873069198823 148.3363662867016
-------------------------- Scene: 0 --------------------------
306 blue_0 Shoot at red_0 launch distance : 57822.67027488219 True True
Same tgt shoot
317 blue_0 Shoot at red_1 launch distance : 56403.35231133506 True True
423 blue_0 Splash :red_0
430 blue_0 Splash :red_1
WIN
blue_0 False True 4

232 blue_1 DOWN
296 blue_0 Shoot at red_0 launch distance : 58491.07997002205 True True
309 blue_0 Shoot at red_0 launch distance : 52793.57884825569 True True
352 red_1 Shoot at blue_0
363 red_1 Shoot at blue_0
416 blue_0 Splash :red_0
423 blue_0: Destroyed
TIME LIMIT LOSE
blue_0 False False 423 0.8586499853756914 120.21668871250559
blue_1 False False 423 0.8989994853756913 -0.5055387819865504
-------------------------- Scene: 0 --------------------------
299 blue_1 Shoot at red_0 launch distance : 57297.160983465896 True True
312 blue_1 Shoot at red_0 launch distance : 51086.38130985112 True True
Same tgt shoot
Same tgt shoot
361 blue_0 Shoot at red_1 launch distance : 59174.92403816186 True True
361 red_1 Shoot at blue_1
372 red_1 Shoot at blue_1
Same tgt shoot
373 blue_0 Shoot at red_0 launch distance : 58385.33799432189 True True
409 blue_1 Splash :red_0
427 blue_1: Destroyed
453 blue_0 Splash :red_1
WIN
blue_0 False True 573 12.093233437696334 127.389276482729
blue_1 False True 5

2022-04-05 02:19:04,376	INFO trainable.py:496 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000002\checkpoint-2
2022-04-05 02:19:04,377	INFO trainable.py:503 -- Current state after restoring: {'_iteration': 2, '_timesteps_total': 61291, '_time_total': 7747.268092393875, '_episodes_total': 98}


agent_timesteps_total: 122582
custom_metrics: {}
date: 2022-04-05_02-19-04
done: false
episode_len_mean: 625.4183673469388
episode_media: {}
episode_reward_max: 281.95243710512045
episode_reward_mean: 124.61735615291727
episode_reward_min: -119.8860543335177
episodes_this_iter: 53
episodes_total: 98
experiment_id: b7bc6bb0bdfa4636b9eb71264eb36a37
hostname: DESKTOP
info:
  learner:
    blue_0:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 6.812302112579346
        entropy_coeff: 0.0
        kl: 0.08685697615146637
        model: {}
        policy_loss: -0.0067107295617461205
        total_loss: 323.0835876464844
        vf_explained_var: 0.13647034764289856
        vf_loss: 323.07293701171875
      train: null
    blue_1:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 6.779872417449951
        entropy_coeff: 0.0
        kl: 0.05934510380029678
  

276 blue_1 Shoot at red_1 launch distance : 59301.21777025819 True True
Same tgt shoot
287 blue_1 Shoot at red_0 launch distance : 58992.811420200334 True True
393 blue_1 Splash :red_1
404 blue_1 Splash :red_0
WIN
blue_0 False True 404 12.870624605074058 1.063776563036301
blue_1 False True 404 29.31122349781333 157.73900951811663
-------------------------- Scene: 0 --------------------------
271 blue_1 Shoot at red_1 launch distance : 58476.93734171997 True True
Same tgt shoot
287 blue_1 Shoot at red_0 launch distance : 56263.200491476135 True True
382 blue_1 Splash :red_1
394 blue_1 Splash :red_0
WIN
blue_0 False True 394 13.047023260956976 45.16260151252115
blue_1 False True 394 29.637389685999274 150.70902821731914
-------------------------- Scene: 0 --------------------------
317 blue_1 Shoot at red_1 launch distance : 55141.583169665624 True True
Same tgt shoot
329 blue_1 Shoot at red_0 launch distance : 54979.404278631846 True True
427 blue_1 Splash :red_1
438 blue_1 Splash :red_

290 blue_1 Shoot at red_0 launch distance : 52410.78767735071 True True
313 red_1 Shoot at blue_1
324 red_1 Shoot at blue_1
341 blue_1 DOWN
392 blue_1 Splash :red_0
Same tgt shoot
Same tgt shoot
422 blue_0 Shoot at red_1 launch distance : 58835.96061293335 True True
Same tgt shoot
Same tgt shoot
434 blue_0 Shoot at red_1 launch distance : 53572.817283695214 True True
545 blue_0 Splash :red_1
WIN
blue_0 False True 634 11.892736946161936 108.7206283375992
blue_1 False True 634 11.791744479495268 90.49967393038077
-------------------------- Scene: 0 --------------------------
274 blue_1 Shoot at red_1 launch distance : 57143.313426241104 True True
Same tgt shoot
286 blue_1 Shoot at red_0 launch distance : 59417.057617309496 True True
387 blue_1 Splash :red_1
404 blue_1 Splash :red_0
WIN
blue_0 False True 404 12.970619473011505 10.847503996369635
blue_1 False True 404 29.41222246575078 147.48769111003918
-------------------------- Scene: 0 --------------------------
310 blue_0 Shoot at red

Same tgt shoot
Same tgt shoot
330 blue_0 Shoot at red_0 launch distance : 54680.56997224044 True True
330 red_0 Shoot at blue_1
371 blue_1 Splash :red_1
427 blue_0 Splash :red_0
WIN
blue_0 False True 530 12.163150943396225 130.75600580935992
blue_1 False True 530 12.264150943396226 154.81039570548057
-------------------------- Scene: 0 --------------------------
298 blue_0 Shoot at red_0 launch distance : 58874.13577281767 True True
310 blue_0 Shoot at red_0 launch distance : 53880.507249727 True True
366 red_1 Shoot at blue_0
377 red_1 Shoot at blue_0
415 blue_0 Splash :red_0
425 blue_0: Destroyed
TIME LIMIT LOSE
blue_0 False False 1200 0.8989994255998186 89.54993118563509
blue_1 False False 1200 0.8929910255998186 -34.56776656058004
-------------------------- Scene: 0 --------------------------
314 blue_0 Shoot at red_0 launch distance : 59226.624290210246 True True
379 red_1 Shoot at blue_0
390 red_1 Shoot at blue_0
434 blue_0 Splash :red_0
449 blue_0: Destroyed
TIME LIMIT LOSE
blue

2022-04-05 03:25:11,391	INFO trainable.py:496 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000003\checkpoint-3
2022-04-05 03:25:11,391	INFO trainable.py:503 -- Current state after restoring: {'_iteration': 3, '_timesteps_total': 91589, '_time_total': 11600.123196601868, '_episodes_total': 148}


agent_timesteps_total: 183178
custom_metrics: {}
date: 2022-04-05_03-25-11
done: false
episode_len_mean: 597.57
episode_media: {}
episode_reward_max: 285.566401514844
episode_reward_mean: 127.81005400440996
episode_reward_min: -101.29010763644374
episodes_this_iter: 50
episodes_total: 148
experiment_id: b7bc6bb0bdfa4636b9eb71264eb36a37
hostname: DESKTOP
info:
  learner:
    blue_0:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 6.816983222961426
        entropy_coeff: 0.0
        kl: 0.08263412863016129
        model: {}
        policy_loss: -0.014387767761945724
        total_loss: 271.1678466796875
        vf_explained_var: 0.14242324233055115
        vf_loss: 271.1657409667969
      train: null
    blue_1:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 6.758408546447754
        entropy_coeff: 0.0
        kl: 0.06368786841630936
        model: 

Same tgt shoot
730 blue_0 Shoot at red_0 launch distance : 57748.24330613246 True True
830 blue_0 Splash :red_1
843 blue_0 Splash :red_0
WIN
blue_0 False True 843 24.770938874541535 181.74626983184555
blue_1 False True 843 11.323963785573563 4.495231902938607
-------------------------- Scene: 0 --------------------------
297 blue_0 Shoot at red_0 launch distance : 59845.578139016514 True True
306 blue_1 Shoot at red_0 launch distance : 58176.98743645911 True True
308 blue_0 Shoot at red_0 launch distance : 54862.9218167534 True True
Same tgt shoot
Same tgt shoot
Same tgt shoot
317 blue_1 Shoot at red_1 launch distance : 59229.24500631991 True True
414 blue_0 Splash :red_0
429 blue_1 Splash :red_1
WIN
blue_0 False True 429 13.298533300882195 147.48955034647813
blue_1 False True 429 28.89193889528779 143.92985295827506
-------------------------- Scene: 0 --------------------------
257 blue_1 DOWN
602 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 602 -1.1010094896210325 -7.69652581761963

180 blue_0 DOWN
371 blue_1 Shoot at red_0 launch distance : 48451.65903129015 True True
380 red_1 Shoot at blue_1
391 red_1 Shoot at blue_1
405 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 405 -0.10100011368331593 -1.4051258347932543
blue_1 False False 405 -0.6069966803499827 11.446305368599447
-------------------------- Scene: 0 --------------------------
294 blue_0 Shoot at red_0 launch distance : 59618.22847623305 True True
298 blue_1 Shoot at red_0 launch distance : 59570.172230285534 True True
307 blue_0 Shoot at red_0 launch distance : 53776.64463207128 True True
Same tgt shoot
Same tgt shoot
Same tgt shoot
309 blue_1 Shoot at red_1 launch distance : 52975.4431647893 True True
335 red_1 Shoot at blue_0
407 blue_1 Splash :red_1
411 blue_0 Splash :red_0
496 blue_0 DOWN
510 blue_1 DOWN
WIN
blue_0 False False 510 12.251941176470588 139.9266008681978
blue_1 False False 510 11.251941176470588 118.48699383994281
-------------------------- Scene: 0 --------------------------
303 blue_1

413 blue_1 Splash :red_0
418 blue_1 Splash :red_1
WIN
blue_0 False True 418 12.872162814206389 10.610392063795853
blue_1 False True 418 29.112796441798093 105.4927806059154
-------------------------- Scene: 0 --------------------------
303 blue_1 Shoot at red_0 launch distance : 59481.09218540011 True True
Same tgt shoot
314 blue_1 Shoot at red_1 launch distance : 55892.78390982719 True True
419 blue_1 Splash :red_0
419 blue_1 Splash :red_1
WIN
blue_0 False True 419 12.865646401371007 8.339094447175816
blue_1 False True 419 29.093576195722637 130.54013014152747
-------------------------- Scene: 0 --------------------------
208 blue_1 DOWN
842 red_1 Shoot at blue_0
853 red_0 Shoot at blue_0
853 red_1 Shoot at blue_0
864 red_0 Shoot at blue_0
910 blue_0: Destroyed
TIME LIMIT LOSE
blue_0 False False 910 -0.1386675619645451 -34.371087354158796
blue_1 False False 910 -0.10100096196454512 -1.40534571174391
-------------------------- Scene: 0 --------------------------
273 blue_1 Shoot at red

2022-04-05 04:30:25,994	INFO trainable.py:496 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000004\checkpoint-4
2022-04-05 04:30:25,995	INFO trainable.py:503 -- Current state after restoring: {'_iteration': 4, '_timesteps_total': 122334, '_time_total': 15471.448161125183, '_episodes_total': 192}


agent_timesteps_total: 244668
custom_metrics: {}
date: 2022-04-05_04-30-25
done: false
episode_len_mean: 651.66
episode_media: {}
episode_reward_max: 292.85939407473893
episode_reward_mean: 117.35186974216458
episode_reward_min: -112.06723377158221
episodes_this_iter: 44
episodes_total: 192
experiment_id: b7bc6bb0bdfa4636b9eb71264eb36a37
hostname: DESKTOP
info:
  learner:
    blue_0:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 6.976182460784912
        entropy_coeff: 0.0
        kl: 0.06754723936319351
        model: {}
        policy_loss: -0.006183571182191372
        total_loss: 218.59771728515625
        vf_explained_var: 0.14233648777008057
        vf_loss: 218.5903778076172
      train: null
    blue_1:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 6.627749919891357
        entropy_coeff: 0.0
        kl: 0.038239311426877975
        mod

364 red_1 Shoot at blue_0
372 red_0 Shoot at blue_0
375 red_1 Shoot at blue_0
383 red_0 Shoot at blue_0
436 blue_0: Destroyed
TIME LIMIT LOSE
blue_0 False False 1200 -0.10100054214505558 11.586026320137462
blue_1 False False 1200 -0.11300054214505557 -67.96764078584116
-------------------------- Scene: 0 --------------------------
291 blue_1 Shoot at red_0 launch distance : 58283.60298372889 True True
303 blue_1 Shoot at red_0 launch distance : 52095.0994279529 True True
342 red_1 Shoot at blue_1
353 red_1 Shoot at blue_1
403 blue_1 Splash :red_0
413 blue_1: Destroyed
Same tgt shoot
Same tgt shoot
413 blue_0 Shoot at red_1 launch distance : 59963.27510365483 True True
Same tgt shoot
Same tgt shoot
424 blue_0 Shoot at red_1 launch distance : 55781.26657539409 True True
474 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 474 0.59900697506924 32.719274647311735
blue_1 False False 474 1.39900057506924 104.49513776295794
-------------------------- Scene: 0 --------------------------
284 blue

265 blue_1 Shoot at red_1 launch distance : 58405.43201648991 True True
276 blue_1 Shoot at red_1 launch distance : 53884.27637056181 True True
310 red_0 Shoot at blue_1
321 red_0 Shoot at blue_1
376 blue_1: Destroyed
380 blue_1 Splash :red_1
634 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 634 -0.10299769149092541 32.64100596311374
blue_1 False False 634 0.8989991751757412 119.50027774032021
-------------------------- Scene: 0 --------------------------
302 blue_0 Shoot at red_1 launch distance : 56622.51435690843 True True
Same tgt shoot
314 blue_0 Shoot at red_0 launch distance : 54087.24976533424 True True
418 blue_0 Splash :red_1
422 blue_0 Splash :red_0
WIN
blue_0 False True 422 28.93117213259367 145.5526800147955
blue_1 False True 422 12.844968341124478 21.65484919573459
-------------------------- Scene: 0 --------------------------
279 blue_1 Shoot at red_1 launch distance : 59619.363684924145 True True
Same tgt shoot
294 blue_1 Shoot at red_0 launch distance : 57754.67065790

198 blue_1 DOWN
314 blue_0 Shoot at red_0 launch distance : 58708.69235433215 True True
325 blue_0 Shoot at red_0 launch distance : 54251.702640969204 True True
371 red_1 Shoot at blue_0
382 red_1 Shoot at blue_0
434 blue_0 Splash :red_0
445 blue_0: Destroyed
TIME LIMIT LOSE
blue_0 False False 445 0.8588073786571846 100.7345442579446
blue_1 False False 445 0.8989996453238513 -0.3037559470122837
-------------------------- Scene: 0 --------------------------
326 blue_1 Shoot at red_0 launch distance : 59387.61871960682 True True
347 blue_1 Shoot at red_0 launch distance : 51296.80874527188 True True
377 red_1 Shoot at blue_1
388 red_1 Shoot at blue_1
445 blue_1 Splash :red_0
449 blue_1: Destroyed
Same tgt shoot
Same tgt shoot
939 blue_0 Shoot at red_1 launch distance : 27793.284407694777 True True
Same tgt shoot
Same tgt shoot
978 blue_0 Shoot at red_1 launch distance : 23779.138489516306 True True
979 blue_0 Splash :red_1
WIN
blue_0 False True 1178 10.917675721561968 33.387400524728335


2022-04-05 05:35:17,699	INFO trainable.py:496 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000005\checkpoint-5
2022-04-05 05:35:17,700	INFO trainable.py:503 -- Current state after restoring: {'_iteration': 5, '_timesteps_total': 152390, '_time_total': 19256.02666068077, '_episodes_total': 234}


agent_timesteps_total: 304780
custom_metrics: {}
date: 2022-04-05_05-35-17
done: false
episode_len_mean: 700.31
episode_media: {}
episode_reward_max: 307.2531173513069
episode_reward_mean: 135.7234678370953
episode_reward_min: -112.06723377158221
episodes_this_iter: 42
episodes_total: 234
experiment_id: b7bc6bb0bdfa4636b9eb71264eb36a37
hostname: DESKTOP
info:
  learner:
    blue_0:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 6.767602443695068
        entropy_coeff: 0.0
        kl: 0.05757463350892067
        model: {}
        policy_loss: 0.023384220898151398
        total_loss: 191.66127014160156
        vf_explained_var: 0.05362660065293312
        vf_loss: 191.62637329101562
      train: null
    blue_1:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 6.910634994506836
        entropy_coeff: 0.0
        kl: 0.0440521202981472
        model: 

326 blue_1 Shoot at red_0 launch distance : 52447.522713876446 True True
Same tgt shoot
Same tgt shoot
382 blue_0 Shoot at red_1 launch distance : 58647.69511020008 True True
388 red_1 Shoot at blue_1
399 red_1 Shoot at blue_1
428 blue_1 Splash :red_0
Same tgt shoot
Same tgt shoot
445 blue_0 Shoot at red_1 launch distance : 50362.76363403593 True True
510 blue_0 Splash :red_1
WIN
blue_0 False True 645 11.85946511627907 103.34359143588665
blue_1 False True 645 11.85946511627907 149.07518905179234
-------------------------- Scene: 0 --------------------------
312 blue_1 Shoot at red_0 launch distance : 57754.483381651145 True True
329 blue_1 Shoot at red_0 launch distance : 51158.24686541626 True True
376 red_1 Shoot at blue_1
387 red_1 Shoot at blue_1
430 blue_1 Splash :red_0
440 blue_1: Destroyed
547 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 547 -0.10100616501070725 -2.930729023899059
blue_1 False False 547 0.8989995349892929 87.75319589236618
-------------------------- Scene: 0 -

418 blue_1: Destroyed
527 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 527 -0.10699968366161094 107.76281503474361
blue_1 False False 527 0.8990003163383891 122.90166313552984
-------------------------- Scene: 0 --------------------------
300 blue_0 Shoot at red_0 launch distance : 59566.53068548715 True True
356 red_1 Shoot at blue_0
367 red_1 Shoot at blue_0
425 blue_0: Destroyed
457 blue_0 Splash :red_0
TIME LIMIT LOSE
blue_0 False False 1200 0.8989992461613993 82.02306852968815
blue_1 False False 1200 0.8929922461613993 16.30864728060422
-------------------------- Scene: 0 --------------------------
275 blue_1 Shoot at red_0 launch distance : 59817.412801938524 True True
Same tgt shoot
287 blue_1 Shoot at red_1 launch distance : 59689.011196980595 True True
386 blue_0 DOWN
389 blue_1 Splash :red_0
401 blue_1 Splash :red_1
WIN
blue_0 False True 401 12.892833234788771 16.31521917545367
blue_1 False True 401 29.47887064127256 150.39946628616102
-------------------------- Scene: 0 --

TIME LIMIT LOSE
blue_0 False False 265 -0.9029932013934381 -0.2174757867119086
blue_1 False False 265 -0.10100160139343811 -1.3037811571111508
-------------------------- Scene: 0 --------------------------
272 blue_1 Shoot at red_0 launch distance : 59867.11598114913 True True
290 blue_1 DOWN
Same tgt shoot
332 blue_0 Shoot at red_1 launch distance : 59679.05734266993 True True
Same tgt shoot
344 blue_0 Shoot at red_1 launch distance : 53917.69421748512 True True
389 blue_1 Splash :red_0
447 blue_0 Splash :red_1
WIN
blue_0 False True 544 12.104882352941177 131.15325310305286
blue_1 False True 544 12.104882352941177 45.75371212880246
-------------------------- Scene: 0 --------------------------
293 blue_0 Shoot at red_1 launch distance : 59274.16350786039 True True
304 blue_0 Shoot at red_1 launch distance : 54126.05676592204 True True
345 red_0 Shoot at blue_0
356 red_0 Shoot at blue_0
412 blue_0 Splash :red_1
417 blue_0: Destroyed
Same tgt shoot
Same tgt shoot
780 blue_1 Shoot at red

407 blue_1 Splash :red_1
418 blue_1 Splash :red_0
WIN
blue_0 False True 418 12.771165432412381 11.467142217139232
blue_1 False True 418 29.112792226670752 157.77549392152682
-------------------------- Scene: 0 --------------------------
206 blue_0 DOWN
281 blue_1 Shoot at red_0 launch distance : 59659.53300640364 True True
Same tgt shoot
293 blue_1 Shoot at red_1 launch distance : 58936.50611393451 True True
394 blue_1 Splash :red_0
406 blue_1 Splash :red_1
WIN
blue_0 False True 406 12.855981587131154 11.753412238606995
blue_1 False True 406 29.367311636392238 142.04661326560066
-------------------------- Scene: 0 --------------------------
286 blue_1 Shoot at red_0 launch distance : 59046.20543289425 True True
300 blue_1 Shoot at red_0 launch distance : 51943.61949751742 True True
329 red_1 Shoot at blue_1
340 red_1 Shoot at blue_1
400 blue_1 Splash :red_0
402 blue_1: Destroyed
464 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 464 -0.1030000987458286 11.839787064808663
blue_1 False F

2022-04-05 06:42:29,579	INFO trainable.py:496 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000006\checkpoint-6
2022-04-05 06:42:29,579	INFO trainable.py:503 -- Current state after restoring: {'_iteration': 6, '_timesteps_total': 182672, '_time_total': 23178.32150888443, '_episodes_total': 287}


agent_timesteps_total: 365344
custom_metrics: {}
date: 2022-04-05_06-42-29
done: false
episode_len_mean: 653.81
episode_media: {}
episode_reward_max: 307.2531173513069
episode_reward_mean: 150.9178936139061
episode_reward_min: -65.59248484135064
episodes_this_iter: 53
episodes_total: 287
experiment_id: b7bc6bb0bdfa4636b9eb71264eb36a37
hostname: DESKTOP
info:
  learner:
    blue_0:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 6.7706217765808105
        entropy_coeff: 0.0
        kl: 0.048353683203458786
        model: {}
        policy_loss: -0.0011671418324112892
        total_loss: 234.86697387695312
        vf_explained_var: 0.023817891255021095
        vf_loss: 234.85848999023438
      train: null
    blue_1:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 6.854300022125244
        entropy_coeff: 0.0
        kl: 0.04181686416268349
        mo

298 blue_0 Shoot at red_0 launch distance : 59238.744296659155 True True
307 blue_1 Shoot at red_0 launch distance : 59137.907360501136 True True
310 blue_0 Shoot at red_0 launch distance : 53630.22553517836 True True
318 blue_1 Shoot at red_0 launch distance : 53599.41961581799 True True
351 red_1 Shoot at blue_0
362 red_1 Shoot at blue_0
409 blue_0: Destroyed
413 blue_0 Splash :red_0
TIME LIMIT LOSE
blue_0 False False 1200 0.8989995119175147 100.66559669442174
blue_1 False False 1200 0.992991545250848 87.88437942431965
-------------------------- Scene: 0 --------------------------
289 blue_1 Shoot at red_0 launch distance : 59582.62499033177 True True
Same tgt shoot
300 blue_1 Shoot at red_1 launch distance : 56683.95931642075 True True
Same tgt shoot
303 blue_0 Shoot at red_0 launch distance : 57645.91212765764 True True
Same tgt shoot
314 blue_0 Shoot at red_0 launch distance : 52335.58053674851 True True
402 blue_1 Splash :red_0
405 blue_1 Splash :red_1
WIN
blue_0 False True 514 1

190 blue_0 DOWN
319 blue_1 Shoot at red_0 launch distance : 58294.343055476886 True True
371 blue_1 Shoot at red_0 launch distance : 40054.30838672402 True True
394 red_1 Shoot at blue_1
405 red_1 Shoot at blue_1
456 blue_1 Splash :red_0
457 blue_1: Destroyed
TIME LIMIT LOSE
blue_0 False False 457 0.8989998654378217 -0.5064617090280854
blue_1 False False 457 0.8583456321044884 77.30032037670934
-------------------------- Scene: 0 --------------------------
283 blue_1 Shoot at red_1 launch distance : 56829.724665193724 True True
Same tgt shoot
297 blue_0 Shoot at red_0 launch distance : 59673.62502591742 True True
Same tgt shoot
301 blue_1 Shoot at red_0 launch distance : 53175.72655783101 True True
Same tgt shoot
Same tgt shoot
309 blue_0 Shoot at red_1 launch distance : 51798.07500176317 True True
390 blue_1 Splash :red_1
402 blue_1 Splash :red_0
WIN
blue_0 False True 509 12.356563850687623 111.92581348402064
blue_1 False True 509 12.356563850687623 136.67780555826477
----------------

896 blue_1 DOWN
1049 red_0 Shoot at blue_0
1060 red_0 Shoot at blue_0
1089 red_1 Shoot at blue_0
1100 red_1 Shoot at blue_0
1110 blue_0: Destroyed
TIME LIMIT LOSE
blue_0 False False 1110 -0.1360008978162427 -41.105603686186534
blue_1 False False 1110 -0.1010008978162427 -11.319328471004415
-------------------------- Scene: 0 --------------------------
269 blue_1 Shoot at red_0 launch distance : 59397.97055295848 True True
Same tgt shoot
282 blue_1 Shoot at red_1 launch distance : 50740.33325216149 True True
304 red_1 Shoot at blue_1
343 blue_1: Destroyed
374 blue_1 Splash :red_1
384 blue_1 Splash :red_0
WIN
blue_0 False True 384 13.125317370288455 12.933415266666666
blue_1 False True 384 29.27531140362179 111.51596539548066
-------------------------- Scene: 0 --------------------------
301 blue_0 Shoot at red_1 launch distance : 59061.22086856103 True True
Same tgt shoot
312 blue_0 Shoot at red_0 launch distance : 58214.98029103941 True True
Same tgt shoot
321 blue_1 Shoot at red_1 lau

2022-04-05 07:47:54,069	INFO trainable.py:496 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000007\checkpoint-7
2022-04-05 07:47:54,069	INFO trainable.py:503 -- Current state after restoring: {'_iteration': 7, '_timesteps_total': 212994, '_time_total': 26996.02397298813, '_episodes_total': 329}


agent_timesteps_total: 425988
custom_metrics: {}
date: 2022-04-05_07-47-53
done: false
episode_len_mean: 632.06
episode_media: {}
episode_reward_max: 290.01019010062015
episode_reward_mean: 158.65311881455236
episode_reward_min: -52.424932157190966
episodes_this_iter: 42
episodes_total: 329
experiment_id: b7bc6bb0bdfa4636b9eb71264eb36a37
hostname: DESKTOP
info:
  learner:
    blue_0:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 7.030477523803711
        entropy_coeff: 0.0
        kl: 0.04103422909975052
        model: {}
        policy_loss: -0.003961527720093727
        total_loss: 160.8507537841797
        vf_explained_var: 0.5020807385444641
        vf_loss: 160.8465118408203
      train: null
    blue_1:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 6.825240612030029
        entropy_coeff: 0.0
        kl: 0.043007589876651764
        model

316 blue_0 Shoot at red_0 launch distance : 58454.75950937337 True True
Same tgt shoot
329 blue_0 Shoot at red_1 launch distance : 53665.86663956832 True True
430 blue_0 Splash :red_0
433 blue_0 Splash :red_1
WIN
blue_0 False True 433 28.815457507342977 127.80558184198574
blue_1 False True 433 12.77173643413282 6.317989119938413
-------------------------- Scene: 0 --------------------------
197 blue_1 DOWN
722 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 722 -1.1050019055715714 -10.58249016284732
blue_1 False False 722 -0.10100150557157148 -1.4047430702793828
-------------------------- Scene: 0 --------------------------
253 blue_1 Shoot at red_1 launch distance : 59476.52932277592 True True
Same tgt shoot
265 blue_1 Shoot at red_0 launch distance : 58384.654050358804 True True
364 blue_1 Splash :red_1
376 blue_1 Splash :red_0
WIN
blue_0 False True 376 13.191811568557826 55.438667628368805
blue_1 False True 376 30.07478325862875 156.19912257851297
-------------------------- Scene: 0 

275 blue_1 Shoot at red_1 launch distance : 58146.549374315706 True True
290 blue_1 Shoot at red_1 launch distance : 51159.84802405716 True True
291 blue_0 Shoot at red_1 launch distance : 59098.970966999404 True True
Same tgt shoot
Same tgt shoot
Same tgt shoot
306 blue_0 Shoot at red_0 launch distance : 58567.14167295871 True True
329 red_0 Shoot at blue_1
387 blue_1 Splash :red_1
416 blue_0 Splash :red_0
WIN
blue_0 False True 529 12.26743100189036 116.07890967112112
blue_1 False True 529 12.26743100189036 151.83222675615346
-------------------------- Scene: 0 --------------------------
254 blue_1 Shoot at red_1 launch distance : 59438.877104630126 True True
275 blue_1 Shoot at red_1 launch distance : 49389.17029527625 True True
315 red_0 Shoot at blue_1
326 red_0 Shoot at blue_1
Same tgt shoot
Same tgt shoot
331 blue_0 Shoot at red_0 launch distance : 59593.570270825214 True True
Same tgt shoot
Same tgt shoot
343 blue_0 Shoot at red_0 launch distance : 54918.887720427585 True True
3

282 blue_0 Shoot at red_1 launch distance : 59291.43281581074 True True
295 blue_0 Shoot at red_1 launch distance : 53021.53709006902 True True
299 blue_1 Shoot at red_1 launch distance : 56179.13682401393 True True
Same tgt shoot
Same tgt shoot
Same tgt shoot
313 blue_1 Shoot at red_0 launch distance : 54005.93595697683 True True
400 blue_0 Splash :red_1
403 blue_1 Splash :red_0
WIN
blue_0 False True 403 13.478038237781188 130.21891572712985
blue_1 False True 403 29.33337322537424 130.21624901472285
-------------------------- Scene: 0 --------------------------
288 blue_1 Shoot at red_0 launch distance : 53648.98531151662 True True
306 blue_1 Shoot at red_0 launch distance : 44249.20744432904 True True
331 red_1 Shoot at blue_1
342 red_1 Shoot at blue_1
388 blue_1 Splash :red_0
402 blue_1: Destroyed
611 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 611 -0.10100600739755716 -6.278579555086988
blue_1 False False 611 0.8989992926024429 117.76938548714685
-------------------------- Scene

266 blue_1 Shoot at red_1 launch distance : 59792.877940251296 True True
Same tgt shoot
283 blue_1 Shoot at red_0 launch distance : 58092.9185317061 True True
Same tgt shoot
288 blue_0 Shoot at red_1 launch distance : 58857.2631053296 True True
Same tgt shoot
Same tgt shoot
305 blue_0 Shoot at red_0 launch distance : 57724.0184485772 True True
381 blue_1 Splash :red_1
398 blue_1 Splash :red_0
WIN
blue_0 False True 505 12.37623095709571 101.2199505570957
blue_1 False True 505 12.375237623762377 131.25584670928927
-------------------------- Scene: 0 --------------------------
328 blue_1 Shoot at red_0 launch distance : 59558.93512461311 True True
339 blue_1 Shoot at red_0 launch distance : 55000.22151175627 True True
387 red_1 Shoot at blue_1
398 red_1 Shoot at blue_1
447 blue_1 Splash :red_0
459 blue_1: Destroyed
492 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 492 -0.10299908280108583 -5.381704509929407
blue_1 False False 492 0.8989996171989142 122.68092150800503
--------------------

2022-04-05 08:54:28,435	INFO trainable.py:496 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000008\checkpoint-8
2022-04-05 08:54:28,436	INFO trainable.py:503 -- Current state after restoring: {'_iteration': 8, '_timesteps_total': 243093, '_time_total': 30885.7329082489, '_episodes_total': 382}


agent_timesteps_total: 486186
custom_metrics: {}
date: 2022-04-05_08-54-28
done: false
episode_len_mean: 632.53
episode_media: {}
episode_reward_max: 310.36215837209863
episode_reward_mean: 166.30890616426458
episode_reward_min: -52.424932157190966
episodes_this_iter: 53
episodes_total: 382
experiment_id: b7bc6bb0bdfa4636b9eb71264eb36a37
hostname: DESKTOP
info:
  learner:
    blue_0:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 6.891984462738037
        entropy_coeff: 0.0
        kl: 0.04286186397075653
        model: {}
        policy_loss: -0.0034598778001964092
        total_loss: 176.2786102294922
        vf_explained_var: 0.3812878429889679
        vf_loss: 176.2735137939453
      train: null
    blue_1:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 6.852017879486084
        entropy_coeff: 0.0
        kl: 0.0428326316177845
        model:

301 blue_1 Shoot at red_0 launch distance : 50954.66431418908 True True
303 blue_0 DOWN
337 red_1 Shoot at blue_1
348 red_1 Shoot at blue_1
383 blue_1 Splash :red_0
403 blue_1: Destroyed
TIME LIMIT LOSE
blue_0 False False 403 0.8989993157479936 -4.425895359234341
blue_1 False False 403 0.858523349081327 106.61566063255049
-------------------------- Scene: 0 --------------------------
291 blue_0 Shoot at red_1 launch distance : 57193.683389093596 True True
Same tgt shoot
307 blue_0 Shoot at red_0 launch distance : 59237.77640450111 True True
403 blue_0 Splash :red_1
430 blue_0 Splash :red_0
WIN
blue_0 False True 430 28.872447307772767 145.7986759441822
blue_1 False True 430 12.791051958935556 24.67294160775194
-------------------------- Scene: 0 --------------------------
245 blue_1 DOWN
305 blue_0 Shoot at red_1 launch distance : 59721.345594820945 True True
Same tgt shoot
329 blue_0 Shoot at red_0 launch distance : 50635.80368980317 True True
361 red_0 Shoot at blue_0
372 red_0 Shoot 

297 blue_1 Shoot at red_1 launch distance : 58980.13363762511 True True
Same tgt shoot
308 blue_1 Shoot at red_0 launch distance : 55420.32288026922 True True
Same tgt shoot
347 blue_0 Shoot at red_1 launch distance : 50974.2924860254 True True
Same tgt shoot
Same tgt shoot
371 blue_0 Shoot at red_0 launch distance : 56388.812464967545 True True
409 blue_1 Splash :red_0
410 blue_1 Splash :red_1
WIN
blue_0 False True 571 12.101576182136602 60.60182144880341
blue_1 False True 571 12.100576182136603 142.83945451220137
-------------------------- Scene: 0 --------------------------
257 blue_0 DOWN
278 blue_1 Shoot at red_0 launch distance : 59484.215434167556 True True
291 blue_1 Shoot at red_0 launch distance : 52833.96422055771 True True
334 red_1 Shoot at blue_1
345 red_1 Shoot at blue_1
390 blue_1 Splash :red_0
408 blue_1: Destroyed
TIME LIMIT LOSE
blue_0 False False 408 0.8989992210032874 -1.549105634330453
blue_1 False False 408 0.8584628876699542 120.77012156955423
------------------

823 red_0 Shoot at blue_1
824 red_1 Shoot at blue_1
834 red_0 Shoot at blue_1
835 red_1 Shoot at blue_1
898 blue_1: Destroyed
TIME LIMIT LOSE
blue_0 False False 1200 -0.000999143979467283 -14.659696850958845
blue_1 False False 1200 -0.10099914397946728 -37.80195115334915
-------------------------- Scene: 0 --------------------------
221 blue_0 DOWN
573 red_0 Shoot at blue_1
577 red_1 Shoot at blue_1
584 red_0 Shoot at blue_1
588 red_1 Shoot at blue_1
663 blue_1: Destroyed
TIME LIMIT LOSE
blue_0 False False 663 -0.10099891377925654 -1.4058354460401563
blue_1 False False 663 -0.14227251377925654 -2.5019219058293443
-------------------------- Scene: 0 --------------------------
289 blue_1 Shoot at red_1 launch distance : 59693.22549895796 True True
300 blue_1 Shoot at red_1 launch distance : 54570.59102198315 True True
345 red_0 Shoot at blue_1
356 red_0 Shoot at blue_1
385 blue_1: Destroyed
404 blue_1 Splash :red_1
TIME LIMIT LOSE
blue_0 False False 1200 0.9929942135700298 10.51304943930

2022-04-05 10:00:26,371	INFO trainable.py:496 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000009\checkpoint-9
2022-04-05 10:00:26,371	INFO trainable.py:503 -- Current state after restoring: {'_iteration': 9, '_timesteps_total': 273375, '_time_total': 34729.992698431015, '_episodes_total': 428}


agent_timesteps_total: 546750
custom_metrics: {}
date: 2022-04-05_10-00-26
done: false
episode_len_mean: 615.81
episode_media: {}
episode_reward_max: 310.36215837209863
episode_reward_mean: 153.29989072099937
episode_reward_min: -102.55102456565987
episodes_this_iter: 46
episodes_total: 428
experiment_id: b7bc6bb0bdfa4636b9eb71264eb36a37
hostname: DESKTOP
info:
  learner:
    blue_0:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 6.99474573135376
        entropy_coeff: 0.0
        kl: 0.042333342134952545
        model: {}
        policy_loss: -0.011637118645012379
        total_loss: 178.7132568359375
        vf_explained_var: 0.15207991003990173
        vf_loss: 178.71641540527344
      train: null
    blue_1:
      learner_stats:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0002500000118743628
        entropy: 6.853607177734375
        entropy_coeff: 0.0
        kl: 0.0327489972114563
        model

292 blue_1 Shoot at red_1 launch distance : 59483.78996599791 True True
305 blue_1 Shoot at red_1 launch distance : 53360.58765198595 True True
324 blue_0 DOWN
334 red_0 Shoot at blue_1
345 red_0 Shoot at blue_1
401 blue_1: Destroyed
TIME LIMIT LOSE
blue_0 False False 401 0.39900015539004274 -2.326813573178129
blue_1 False False 401 0.35871218872337607 97.66816730201494
-------------------------- Scene: 0 --------------------------
283 blue_1 Shoot at red_0 launch distance : 58759.9947873943 True True
299 blue_1 Shoot at red_0 launch distance : 50603.11399173442 True True
319 blue_0 Shoot at red_0 launch distance : 58269.78305625306 True True
332 blue_0 Shoot at red_0 launch distance : 58682.57510312563 True True
336 red_1 Shoot at blue_1
347 red_1 Shoot at blue_1
395 blue_1 Splash :red_0
407 blue_1: Destroyed
TIME LIMIT LOSE
blue_0 False False 1200 0.8929914524914128 79.298926435149
blue_1 False False 1200 0.8989990524914128 112.53876774153687
-------------------------- Scene: 0 -----

274 blue_1 Shoot at red_0 launch distance : 58914.28365502992 True True
Same tgt shoot
288 blue_1 Shoot at red_1 launch distance : 58735.52511121766 True True
304 blue_0 DOWN
388 blue_1 Splash :red_0
402 blue_1 Splash :red_1
WIN
blue_0 False True 402 12.885380957262011 11.683659485375385
blue_1 False True 402 29.355530210993354 158.08404059090628
-------------------------- Scene: 0 --------------------------
287 blue_0 Shoot at red_1 launch distance : 59332.87689314704 True True
Same tgt shoot
298 blue_0 Shoot at red_0 launch distance : 55161.44706546517 True True
Same tgt shoot
311 blue_1 Shoot at red_1 launch distance : 57795.394681434096 True True
Same tgt shoot
Same tgt shoot
323 blue_1 Shoot at red_0 launch distance : 53170.131676625555 True True
401 blue_0 Splash :red_1
401 blue_0 Splash :red_0
WIN
blue_0 False True 523 12.294455066921607 136.44073707340527
blue_1 False True 523 12.294455066921607 97.61370416692137
-------------------------- Scene: 0 --------------------------
28

339 red_1 Shoot at blue_1
Same tgt shoot
Same tgt shoot
348 blue_0 Shoot at red_1 launch distance : 53211.146765863756 True True
395 blue_1 Splash :red_0
453 blue_0 Splash :red_1
WIN
blue_0 False True 548 12.18878102189781 118.93204373359745
blue_1 False True 548 12.088781021897809 155.70097505565272
-------------------------- Scene: 0 --------------------------
326 blue_0 Shoot at red_0 launch distance : 59776.26407481486 True True
348 blue_0 Shoot at red_0 launch distance : 50523.25596327821 True True
407 red_1 Shoot at blue_0
418 red_1 Shoot at blue_0
447 blue_0 Splash :red_0
482 blue_0: Destroyed
TIME LIMIT LOSE
blue_0 False False 1200 0.8989995920344721 86.10791164894657
blue_1 False False 1200 0.8930024587011388 -46.47866642745273
-------------------------- Scene: 0 --------------------------
163 blue_0 DOWN
370 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 370 -0.1010014254615441 -1.4053750164459375
blue_1 False False 370 -1.1049948921282107 -1.1772624125189737
----------------