In [1]:
%matplotlib tk

import argparse
import gym
import datetime
import os
import random
import tempfile
import numpy as np
import pickle

import ray
from ray import tune
from ray.tune.logger import Logger, UnifiedLogger, pretty_print
from ray.rllib.env.multi_agent_env import make_multi_agent
from ray.rllib.examples.models.shared_weights_model import TF2SharedWeightsModel
from ray.rllib.models import ModelCatalog
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.test_utils import check_learning_achieved
from ray.rllib.agents.ppo import ppo, PPOTrainer, PPOTFPolicy
from ray.rllib.agents.impala.vtrace_tf_policy import VTraceTFPolicy
from ray.rllib.agents.impala import impala, ImpalaTrainer
from ray.rllib.models import ModelCatalog
from ray.rllib.policy.policy import PolicySpec
from environment_rllib_3d import MyEnv
#from test_env_for_lstm import MyEnv
from settings.initial_settings import *
from settings.reset_conditions import reset_conditions
#from modules.models import MyConv2DModel_v0B_Small_CBAM_1DConv_Share
#from modules.models import MyRNNUAVClass
#from modules.models import DenseNetModelLarge
from tensorflow.keras.utils import plot_model
from modules.savers import save_conditions
from utility.result_env import render_env
from utility.terminate_uavsimproc import teminate_proc
from utility.latest_learned_file_path import latest_learned_file_path
from utility.save_logs import save_logs_IMPALA
from utility.save_logs import save_hists
from utility.save_logs import save_env_info

import matplotlib.pyplot as plt
import matplotlib
import tensorflow as tf
import cv2
import ctypes
import warnings

#UCAV.exeが起動している場合、プロセスキルする。
teminate_proc.UAVsimprockill(proc_name="UCAV.exe")

warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) 
warnings.filterwarnings('ignore', category=matplotlib.MatplotlibDeprecationWarning)
np.set_printoptions(precision=3, suppress=True)
PROJECT = "UCAV"
TRIAL_ID = 2
TRIAL = 'test_' + str(TRIAL_ID)
EVAL_FREQ = 1
CONTINUAL = False
NUM_EVAL = 1
def custom_log_creator(custom_path, custom_str):
    timestr = datetime.datetime.today().strftime("%Y-%m-%d_%H-%M-%S")
    logdir_prefix = "{}_{}".format(custom_str, timestr)

    def logger_creator(config):
        if not os.path.exists(custom_path):
            os.makedirs(custom_path)
        logdir = tempfile.mkdtemp(prefix=logdir_prefix, dir=custom_path)
        return UnifiedLogger(config, logdir, loggers=None)

    return logger_creator

ray.shutdown()
ray.init(ignore_reinit_error=True, log_to_driver=False)

#ModelCatalog.register_custom_model('my_model', MyRNNUAVClass)

eval_env = MyEnv()
policies = {
    #"blue_1": PolicySpec(config={"gamma": 0.99}),
    #"blue_2": PolicySpec(config={"gamma": 0.95}),
    #"blue_0": (PPOTFPolicy, eval_env.observation_space, eval_env.action_space,
    #           {"model":{"vf_share_layers": False,"use_lstm": True,"max_seq_len": 200},}),
    #"blue_1": (PPOTFPolicy, eval_env.observation_space, eval_env.action_space,
    #           {"model":{"vf_share_layers": False,"use_lstm": True,"max_seq_len": 200},}),
    "blue_0": (VTraceTFPolicy, eval_env.observation_space, eval_env.action_space,
               {"model":{"vf_share_layers": False,"use_lstm": True,"max_seq_len": 200},}),
    "blue_1": (VTraceTFPolicy, eval_env.observation_space, eval_env.action_space,
               {"model":{"vf_share_layers": False,"use_lstm": True,"max_seq_len": 200},}),
}
policy_ids = list(policies.keys())

def policy_mapping_fn(agent_id, episode, **kwargs):
    #print(agent_id,episode)
    #pol_id = policy_ids[agent_id]

    pol_id = agent_id
    return pol_id

# Instanciate the evaluation env
config = impala.DEFAULT_CONFIG.copy()
config = {"env": MyEnv,"num_gpus": 0,"num_workers": 0, "num_cpus_per_worker": 0,"num_gpus_per_worker": 0,
          "train_batch_size": 600*5*2,
          "batch_mode": "complete_episodes",
          "gamma":0.995, "lr": 2.5e-4,
          #"clip_actions":True,"normalize_actions":True,
          "observation_space":eval_env.observation_space,"action_space":eval_env.action_space,
          "explore":True,
          "rollout_fragment_length":300,"num_sgd_iter": 20,"learner_queue_size": 300,
          #"sgd_minibatch_size": 300, "num_sgd_iter":20,
          #"exploration_config": {"type": "StochasticSampling","random_timesteps":0}, #PPO デフォルト "random_timesteps":0
          #"model":{"fcnet_activation": "relu","fcnet_hiddens": [256, 256, 256],"post_fcnet_activation": "linear",
          #         "vf_share_layers": True,},#"linear","relu","tanh" "use_lstm":True,"lstm_cell_size":256,"max_seq_len":128
          "learner_queue_timeout": 900,
          #"model": {"custom_model": "my_model"},
          "multiagent": {"policies": policies,  "policy_mapping_fn": policy_mapping_fn}
         }
#res_name = "sgd"+str(config["sgd_minibatch_size"])+"sgd_num"+str(config["num_sgd_iter"])+"lr"+str(config["lr"])+"gamma"+str(config["gamma"])
res_name = "test"
conditions_dir = os.path.join('./' + PROJECT + '/conditions/')

if not os.path.exists(conditions_dir):
    os.makedirs(conditions_dir)
save_conditions(conditions_dir)

# PPOTrainer()は、try_import_tfを使うと、なぜかTensorflowのeager modeのエラーになる。

trainer = impala.ImpalaTrainer(config=config,
                         logger_creator=custom_log_creator(
                             os.path.expanduser("./" + PROJECT + "/logs"), TRIAL))

if CONTINUAL:
    # Continual learning: Need to specify the checkpoint
    # model_path = PROJECT + '/checkpoints/' + TRIAL + '/checkpoint_000197/checkpoint-197'
    model_path = latest_learned_file_path('./UCAV/checkpoints/test_2/*')
    trainer.restore(checkpoint_path=model_path)

models_dir = os.path.join('./' + PROJECT + '/models/')
if not os.path.exists(models_dir):
    os.makedirs(models_dir)
for j in range(2):
    text_name = models_dir + TRIAL + "blue_"+str(j) +'.txt'
    with open(text_name, "w") as fp:
        trainer.get_policy("blue_"+str(j)).model.base_model.summary(print_fn=lambda x: fp.write(x + "\r\n"))
    png_name = models_dir + TRIAL + '.png'
    plot_model(trainer.get_policy("blue_"+str(j)).model.base_model, to_file=png_name, show_shapes=True)



# Define checkpoint dir
check_point_dir = os.path.join('./' + PROJECT + '/checkpoints/', TRIAL)
if not os.path.exists(check_point_dir):
    os.makedirs(check_point_dir)

  for external in metadata.entry_points().get(self.group, []):

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
Could not import from numba, which means that some
parts of this code may run MUCH more slowly.  You
may wish to install numba.
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

2022-03-24 18:46:09,833	INFO trainer.py:2055 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
2022-03-24 18:46:09,833	INFO trainer.py:792 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model/model_to_dot to work.
You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model/model_to_dot to work.


In [2]:
# -*- coding: utf-8 -*-
"""
Created on Mon Mar  7 21:50:01 2022

@author: Takumi
"""
eval_env.reset()
save_env_info(eval_env)
record_mode = 0
results_dir = os.path.join('./' + PROJECT + '/results/')

if not os.path.exists(results_dir):
    os.makedirs(results_dir)
results_file = results_dir + TRIAL + '.pkl'
for steps in range(10001):
    # Training
    print(f'\n----------------- Training at steps:{steps} start! -----------------')
    eval_env.eval = False
    eval_env.reset()
    results = trainer.train()
    save_logs_IMPALA(res_name,results,steps,CONTINUAL)
    print(pretty_print(results))
    #check_point = trainer.save(checkpoint_dir=check_point_dir)
    # Evaluation
    if steps % EVAL_FREQ == 0:
        print(f'\n-------------- Evaluation at steps:{steps} starting ! --------------')

        check_point = trainer.save(checkpoint_dir=check_point_dir)
        for i in range(NUM_EVAL):
            # print(f'\nEvaluation {i}:')
            model_path = latest_learned_file_path('./UCAV/checkpoints/test_2/*')
            trainer.restore(checkpoint_path=model_path)
            eval_env.eval = True
            obs = eval_env.reset()
            done = False
            
            step_num = 0
            #fig = plt.figure(1,figsize=(8.0, 6.0))
            ESC = 0x1B          # ESCキーの仮想キーコード
            trajectory_length = 100

            cell_size = 256
            state_0=[np.zeros(cell_size, np.float32),np.zeros(cell_size, np.float32)]
            state_1=[np.zeros(cell_size, np.float32),np.zeros(cell_size, np.float32)]
            action_dict0 = [0,0]
            action_dict1 = [0,0]
            rewards = {"blue_0":0,"blue_1":0}
            if record_mode == 0:
                file_name = "test_num" + str(steps) +str(i)
                #video = cv2.VideoWriter(file_name+'.mp4',0x00000020,20.0,(800,600))

            while True:
                action_dict = {}
                action_dict0 = trainer.compute_single_action(obs['blue_0'],
                                                             state=state_0,prev_action=None,prev_reward=None,
                                                             policy_id='blue_0',explore=False)
                action_dict1 = trainer.compute_single_action(obs['blue_1'],
                                                             state=state_1,prev_action=None,prev_reward=None,
                                                             policy_id='blue_1',explore=False)
                
                #action_dict0 = trainer.compute_single_action(obs['blue_0'],policy_id='blue_0')
                #action_dict1 = trainer.compute_single_action(obs['blue_1'],policy_id='blue_1')
                state_0 = action_dict0[1]
                state_1 = action_dict1[1]
                obs, rewards, dones, infos = eval_env.step({'blue_0': action_dict0[0], 'blue_1': action_dict1[0]})

                env_blue_pos_temp_mod, env_red_pos_temp_mod, env_mrm_pos_temp_mod = render_env.copy_from_env_mod(eval_env)
                if eval_env.timer == 1:
                    env_blue_pos_mod = env_blue_pos_temp_mod
                    env_red_pos_mod = env_red_pos_temp_mod
                    env_mrm_pos_mod = env_mrm_pos_temp_mod
                else:
                    env_blue_pos_mod = np.vstack([env_blue_pos_mod,env_blue_pos_temp_mod])
                    env_red_pos_mod = np.vstack([env_red_pos_mod,env_red_pos_temp_mod])
                    env_mrm_pos_mod = np.vstack([env_mrm_pos_mod,env_mrm_pos_temp_mod])

                # plt.clf()

                # plt.subplots_adjust(left=-0.1,right=1.1,bottom=-0.1,top=1.1)
                # fig.canvas.draw()
                # plt.pause(.01)

                #if record_mode == 0:
                    #img = np.array(fig.canvas.renderer.buffer_rgba())
                    #img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
                    #video.write(img.astype('uint8'))

                
                step_num = step_num + 1
                
                done = dones["__all__"]
                
                #print(f'rewards:{rewards}')
                #if record_mode == 0:
                #    img = eval_env.render_movie(file_name,step_num)
                #    video.write(img.astype('unit8'))
                #elif record_mode == 1:
                #    eval_env.render()
                #elif record_mode == 2:
                #    eval_env.render()
                    
                #env_blue_pos_temp, env_red_pos_temp, env_mrm_pos_temp = render_env.copy_from_env(eval_env)
                
                #env_blue_pos.append(env_blue_pos_temp)
                #env_red_pos.append(env_red_pos_temp)
                #env_mrm_pos.append(env_mrm_pos_temp)
                #step_num = step_num + 1
                # エピソードの終了処理
                if dones['__all__']:
                    save_hists("blue",steps,env_blue_pos_mod)
                    save_hists("red",steps,env_red_pos_mod)
                    save_hists("mrm",steps,env_mrm_pos_mod)
                    # print(f'all done at {env.steps}')
                    break
                

            
            #if record_mode == 0:
               # video.release()

ray.shutdown()

-------------------------- Scene: 0 --------------------------

----------------- Training at steps:0 start! -----------------
-------------------------- Scene: 0 --------------------------
-------------------------- Scene: 0 --------------------------
99 blue_1 DOWN
115 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 115 -1.1009987981833425 -1.3061967681824052
blue_1 False False 115 -0.10100223151667576 -1.3051159839864197
-------------------------- Scene: 0 --------------------------
85 blue_0 DOWN
630 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 630 -0.10100134513402687 -1.608335877843007
blue_1 False False 630 -1.1010013451340268 -17.76602157661186
-------------------------- Scene: 0 --------------------------
89 blue_0 DOWN
923 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 923 -0.10100148430877032 -1.3052661463246176
blue_1 False False 923 -1.1050014843087703 -64.76242573296123
-------------------------- Scene: 0 --------------------------
279 blue_0 DOWN
1024 red_1 Shoot at blu



223 blue_0 DOWN
1016 red_0 Shoot at blue_1
1027 red_0 Shoot at blue_1
1028 red_1 Shoot at blue_1
1047 red_1 Shoot at blue_1
1081 blue_1: Destroyed
TIME LIMIT LOSE
blue_0 False False 1081 -0.1010008918407424 -3.1158764542310147
blue_1 False False 1081 -0.1360008918407424 -68.86263005469073
-------------------------- Scene: 0 --------------------------
76 blue_1 DOWN
590 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 590 -1.1030055907858782 -6.431403960728122
blue_1 False False 590 -0.10100152411921158 -1.3052062904000092
-------------------------- Scene: 0 --------------------------
46 blue_0 DOWN
473 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 473 -0.10100060153834309 -1.3049538261364526
blue_1 False False 473 -1.1010056015383431 -16.354724652381005
-------------------------- Scene: 0 --------------------------
74 blue_0 DOWN
759 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 759 -0.10100189669801576 -1.3062266776656757
blue_1 False False 759 -1.1010018966980157 -45.882235870861955




agent_timesteps_total: 25354
custom_metrics: {}
date: 2022-03-24_19-02-56
done: false
episode_len_mean: 667.2105263157895
episode_media: {}
episode_reward_max: 1.6517918000371157
episode_reward_mean: -34.22037495820505
episode_reward_min: -87.093776308753
episodes_this_iter: 19
episodes_total: 19
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 6.84153413772583
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 39.999996185302734
        model: {}
        policy_loss: -10.26709270477295
        var_gnorm: 44.39776611328125
        vf_explained_var: -0.05583345890045166
        vf_loss: 8.015331268310547
  learner_queue:
    size_count: 1
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 25354
  num_steps_sampled: 12677
  num_steps

2022-03-24 19:02:57,327	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000001\checkpoint-1
2022-03-24 19:02:57,327	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 1, '_timesteps_total': 12618, '_time_total': 995.4890348911285, '_episodes_total': 19}


-------------------------- Scene: 0 --------------------------
923 red_1 Shoot at blue_0
934 red_1 Shoot at blue_0
946 red_0 Shoot at blue_0
957 red_0 Shoot at blue_0
976 blue_0: Destroyed
TIME LIMIT LOSE
blue_0 False False 1200 -0.101001565014676 -33.11473671026916
blue_1 False False 1200 -0.113001565014676 -68.75366659091054

----------------- Training at steps:1 start! -----------------
-------------------------- Scene: 0 --------------------------
61 blue_0 DOWN
493 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 493 -0.1010013452225893 -1.204058995615827
blue_1 False False 493 -1.101000411889256 -7.404342828058382
-------------------------- Scene: 0 --------------------------
128 blue_0 DOWN
306 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 306 -0.10100165659543736 -1.3371830065725727
blue_1 False False 306 -1.101001623262104 -3.136056748172366
-------------------------- Scene: 0 --------------------------
68 blue_0 DOWN
120 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 120 -0.10

125 blue_1 DOWN
210 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 210 -1.1010046442665127 -2.743781112194087
blue_1 False False 210 -0.10100094426651271 -2.522428012174197
-------------------------- Scene: 0 --------------------------
67 blue_0 DOWN
94 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 94 -0.1010016625382386 -1.3049049506409096
blue_1 False False 94 -1.101006795871572 -1.4085116367357868
-------------------------- Scene: 0 --------------------------
56 blue_1 DOWN
240 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 240 -1.1009994075436151 -1.919352506760093
blue_1 False False 240 -0.10100150754361521 -1.3050714307351075
-------------------------- Scene: 0 --------------------------
192 blue_0 DOWN
460 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 460 -0.10100254835108967 -3.373963444976358
blue_1 False False 460 -1.101002381684423 -5.336009130539742
-------------------------- Scene: 0 --------------------------


2022-03-24 19:13:52,358	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000002\checkpoint-2
2022-03-24 19:13:52,358	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 2, '_timesteps_total': 25354, '_time_total': 1563.6867311000824, '_episodes_total': 43}


agent_timesteps_total: 37636
custom_metrics: {}
date: 2022-03-24_19-13-52
done: false
episode_len_mean: 437.6279069767442
episode_media: {}
episode_reward_max: 88.17366309896762
episode_reward_mean: -14.215156937304414
episode_reward_min: -87.093776308753
episodes_this_iter: 24
episodes_total: 43
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 7.60471248626709
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: -0.8294349312782288
        var_gnorm: 44.39942169189453
        vf_explained_var: 0.5258656144142151
        vf_loss: 0.0715508908033371
  learner_queue:
    size_count: 3
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 37636
  num_steps_sampled: 18818
  num_steps_trained: 2535

45 blue_1 DOWN
73 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 73 -1.1009980803280586 -1.3049997165182994
blue_1 False False 73 -0.10100144699472528 -1.3050842893652301
-------------------------- Scene: 0 --------------------------
46 blue_1 DOWN
110 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 110 -1.1010051960529441 -1.3044277763093357
blue_1 False False 110 -0.10100149605294415 -1.3049348362780682
-------------------------- Scene: 0 --------------------------
209 blue_0 DOWN
368 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 368 -0.10100177902930219 -3.473032479370914
blue_1 False False 368 -1.1009999123626355 -3.1249833374144664
-------------------------- Scene: 0 --------------------------
64 blue_0 DOWN
160 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 160 -0.10100048058881612 -1.3050842444117003
blue_1 False False 160 -1.1009971805888161 -2.0398255277604393
-------------------------- Scene: 0 --------------------------
57 blue_0 DOWN
110 blue_1 DOWN
TIME LIMIT LOSE
blue_0 F

2022-03-24 19:23:49,178	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000003\checkpoint-3
2022-03-24 19:23:49,178	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 3, '_timesteps_total': 38090, '_time_total': 2126.5528366565704, '_episodes_total': 68}


agent_timesteps_total: 49724
custom_metrics: {}
date: 2022-03-24_19-23-48
done: false
episode_len_mean: 365.61764705882354
episode_media: {}
episode_reward_max: 125.99880193930804
episode_reward_mean: -7.506521237498893
episode_reward_min: -87.093776308753
episodes_this_iter: 25
episodes_total: 68
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 9.01840591430664
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: 0.5512830018997192
        var_gnorm: 44.40263366699219
        vf_explained_var: 0.5746622085571289
        vf_loss: 0.11240583658218384
  learner_queue:
    size_count: 6
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 49724
  num_steps_sampled: 24862
  num_steps_trained: 380

60 blue_0 DOWN
92 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 92 -0.10100166154551901 -1.3050521254093732
blue_1 False False 92 -1.1010070948788524 -1.3046594775056441
-------------------------- Scene: 0 --------------------------
96 blue_0 DOWN
220 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 220 -0.10100146191940637 -1.4059817353129107
blue_1 False False 220 -1.101003128586073 -2.591694319234032
-------------------------- Scene: 0 --------------------------
47 blue_0 DOWN
158 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 158 -0.10100124848864316 -1.3040071322224986
blue_1 False False 158 -1.1010030818219765 -1.4178980250692161
-------------------------- Scene: 0 --------------------------
47 blue_0 DOWN
458 blue_1 Shoot at red_0 launch distance : 58219.653727587545 True True
479 blue_1 Shoot at red_0 launch distance : 51571.574233976666 True True
536 red_1 Shoot at blue_1
547 red_1 Shoot at blue_1
562 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 562 -0.10099958221270547 -1.30

2022-03-24 19:34:56,462	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000004\checkpoint-4
2022-03-24 19:34:56,462	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 4, '_timesteps_total': 50372, '_time_total': 2708.1191022396088, '_episodes_total': 96}


agent_timesteps_total: 61798
custom_metrics: {}
date: 2022-03-24_19-34-56
done: false
episode_len_mean: 321.8645833333333
episode_media: {}
episode_reward_max: 125.99880193930804
episode_reward_mean: -4.9990428986159055
episode_reward_min: -87.093776308753
episodes_this_iter: 28
episodes_total: 96
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 10.505046844482422
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 39.999996185302734
        model: {}
        policy_loss: 0.7711125016212463
        var_gnorm: 44.4050178527832
        vf_explained_var: 0.6927337050437927
        vf_loss: 0.3223501145839691
  learner_queue:
    size_count: 10
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 61798
  num_steps_sampled: 30899
  num_ste

102 blue_1 DOWN
110 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 110 -1.1010015494384886 -1.8192224319810961
blue_1 False False 110 -0.10100171610515536 -1.4061458173047772
-------------------------- Scene: 0 --------------------------
98 blue_0 DOWN
183 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 183 -0.10100047033500739 -1.4064060124284872
blue_1 False False 183 -1.1009960703350075 -1.3277227375382534
-------------------------- Scene: 0 --------------------------
42 blue_0 DOWN
168 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 168 -0.10100166002864891 -1.4058820746007104
blue_1 False False 168 -1.100997060028649 -2.1507305253883438
-------------------------- Scene: 0 --------------------------
43 blue_1 DOWN
81 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 81 -1.1010010326450015 -1.30512696110171
blue_1 False False 81 -0.10100069931166814 -1.2039349355461457
-------------------------- Scene: 0 --------------------------
176 blue_0 DOWN
377 blue_1 DOWN
TIME LIMIT LOSE
blue_0 Fa

2022-03-24 19:46:26,505	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000005\checkpoint-5
2022-03-24 19:46:26,505	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 5, '_timesteps_total': 62654, '_time_total': 3309.561379671097, '_episodes_total': 120}


agent_timesteps_total: 75032
custom_metrics: {}
date: 2022-03-24_19-46-26
done: false
episode_len_mean: 243.46
episode_media: {}
episode_reward_max: 125.99880193930804
episode_reward_mean: 3.044688368174078
episode_reward_min: -8.709972575516094
episodes_this_iter: 24
episodes_total: 120
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 12.975685119628906
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: 0.24413347244262695
        var_gnorm: 44.41324234008789
        vf_explained_var: 0.7886946201324463
        vf_loss: 0.10627612471580505
  learner_queue:
    size_count: 15
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 75032
  num_steps_sampled: 37516
  num_steps_trained: 62654
  n

140 blue_0 DOWN
409 blue_1 Shoot at red_0 launch distance : 58901.9058724091 True True
424 blue_1 Shoot at red_0 launch distance : 53400.99043766059 True True
490 red_1 Shoot at blue_1
501 red_1 Shoot at blue_1
538 blue_1 Splash :red_0
539 blue_1: Destroyed
TIME LIMIT LOSE
blue_0 False False 539 -0.10100082545543639 -1.4192483862033776
blue_1 False False 539 -0.1398357587887697 116.55867678390416
-------------------------- Scene: 0 --------------------------
86 blue_0 DOWN
91 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 91 -0.10100166377666686 -1.3049742179703836
blue_1 False False 91 -1.100996463776667 -1.305566887641327
-------------------------- Scene: 0 --------------------------
73 blue_0 DOWN
197 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 197 -0.10100166404712803 -1.3052612591534458
blue_1 False False 197 -1.100996864047128 -2.1456508872115236
-------------------------- Scene: 0 --------------------------
62 blue_0 DOWN
187 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 187

2022-03-24 19:56:56,065	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000006\checkpoint-6
2022-03-24 19:56:56,065	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 6, '_timesteps_total': 74936, '_time_total': 3892.3816187381744, '_episodes_total': 147}


agent_timesteps_total: 87318
custom_metrics: {}
date: 2022-03-24_19-56-55
done: false
episode_len_mean: 236.85
episode_media: {}
episode_reward_max: 125.99880193930804
episode_reward_mean: 2.75461849447049
episode_reward_min: -6.598015816785381
episodes_this_iter: 27
episodes_total: 147
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 16.194259643554688
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: 0.20388668775558472
        var_gnorm: 44.43227767944336
        vf_explained_var: 0.8904633522033691
        vf_loss: 0.0939253717660904
  learner_queue:
    size_count: 21
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 87318
  num_steps_sampled: 43659
  num_steps_trained: 74936
  num

115 blue_1 DOWN
159 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 159 -1.1009961967037145 -2.643260442947806
blue_1 False False 159 -0.10100166337038122 -1.3053516497158886
-------------------------- Scene: 0 --------------------------
108 blue_0 DOWN
126 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 126 -0.10100099013935293 -1.2037866574189209
blue_1 False False 126 -1.1010009234726863 -1.4058825093204037
-------------------------- Scene: 0 --------------------------
44 blue_0 DOWN
102 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 102 -0.10100039939065408 -1.3049641272859898
blue_1 False False 102 -1.1009908327239875 -1.5081875579923274
-------------------------- Scene: 0 --------------------------
140 blue_1 DOWN
206 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 206 -1.1010057041070906 -1.4404281673423092
blue_1 False False 206 -0.10100220410709064 -1.3112447837903207
-------------------------- Scene: 0 --------------------------
92 blue_0 DOWN
110 blue_1 DOWN
TIME LIMIT LOSE
blu

2022-03-24 20:08:51,327	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000007\checkpoint-7
2022-03-24 20:08:51,327	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 7, '_timesteps_total': 87024, '_time_total': 4559.7928347587585, '_episodes_total': 184}


agent_timesteps_total: 100438
custom_metrics: {}
date: 2022-03-24_20-08-51
done: false
episode_len_mean: 222.7
episode_media: {}
episode_reward_max: 142.95349984355363
episode_reward_mean: 2.708417674482086
episode_reward_min: -7.385912443943092
episodes_this_iter: 37
episodes_total: 184
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 21.470619201660156
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: 0.22234338521957397
        var_gnorm: 44.46356201171875
        vf_explained_var: 0.9573943614959717
        vf_loss: 0.0675528272986412
  learner_queue:
    size_count: 28
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 100438
  num_steps_sampled: 50219
  num_steps_trained: 87024
  n

46 blue_1 DOWN
132 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 132 -1.1010123075956852 -1.4354995639160777
blue_1 False False 132 -0.10100074092901862 -1.305204886602541
-------------------------- Scene: 0 --------------------------
111 blue_0 DOWN
114 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 114 -0.10100166388647752 -1.3052139425491154
blue_1 False False 114 -1.1010060638864776 -1.3466834678202255
-------------------------- Scene: 0 --------------------------
94 blue_0 DOWN
152 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 152 -0.10100175180480142 -1.306294408740237
blue_1 False False 152 -1.1010004518048013 -1.317973487026625
-------------------------- Scene: 0 --------------------------
114 blue_0 DOWN
160 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 160 -0.10100165430041705 -1.3050791029884041
blue_1 False False 160 -1.100998054300417 -1.307250694329573
-------------------------- Scene: 0 --------------------------
55 blue_1 DOWN
88 blue_0 DOWN
TIME LIMIT LOSE
blue_0 Fa

93 blue_0 DOWN
124 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 124 -0.10100085373415978 -1.507314538681663
blue_1 False False 124 -1.101006987067493 -1.4069629852595824
-------------------------- Scene: 0 --------------------------
75 blue_0 DOWN
105 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 105 -0.1010016632566007 -1.305058893736842
blue_1 False False 105 -1.101006596589934 -1.3060113490134033
-------------------------- Scene: 0 --------------------------


2022-03-24 20:20:34,239	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000008\checkpoint-8
2022-03-24 20:20:34,239	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 8, '_timesteps_total': 99112, '_time_total': 5215.6420476436615, '_episodes_total': 224}


agent_timesteps_total: 112774
custom_metrics: {}
date: 2022-03-24_20-20-34
done: false
episode_len_mean: 179.09
episode_media: {}
episode_reward_max: 142.95349984355363
episode_reward_mean: -0.2549807937244053
episode_reward_min: -7.385912443943092
episodes_this_iter: 40
episodes_total: 224
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 26.845733642578125
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: 0.12325318157672882
        var_gnorm: 44.5074462890625
        vf_explained_var: 0.9801400899887085
        vf_loss: 0.05084478110074997
  learner_queue:
    size_count: 36
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 112774
  num_steps_sampled: 56387
  num_steps_trained: 99112


108 blue_1 DOWN
183 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 183 -1.1010032325791037 -3.0582673039549673
blue_1 False False 183 -0.10100059924577028 -1.3050452111568922
-------------------------- Scene: 0 --------------------------
59 blue_1 DOWN
315 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 315 -1.1010041599117144 -3.6817844969241937
blue_1 False False 315 -0.10100165991171432 -1.3050448822711909
-------------------------- Scene: 0 --------------------------
119 blue_0 DOWN
145 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 145 -0.10100083064669199 -1.3051109929361175
blue_1 False False 145 -1.1010106973133587 -1.3052343012072418
-------------------------- Scene: 0 --------------------------
45 blue_1 DOWN
178 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 178 -1.1009983315766785 -2.659559938621068
blue_1 False False 178 -0.10100166491001178 -1.2041945613543883
-------------------------- Scene: 0 --------------------------
68 blue_0 DOWN
242 blue_1 DOWN
TIME LIMIT LOSE
blue

145 blue_0 DOWN
202 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 202 -0.10100165668753762 -1.3073828802998329
blue_1 False False 202 -1.1009982233542044 -1.4328157127566907
-------------------------- Scene: 0 --------------------------
65 blue_0 DOWN
128 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 128 -0.10100122037010234 -1.3049015497426741
blue_1 False False 128 -1.101003587036769 -1.4061345726494685
-------------------------- Scene: 0 --------------------------


2022-03-24 20:33:24,916	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000009\checkpoint-9
2022-03-24 20:33:24,916	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 9, '_timesteps_total': 111200, '_time_total': 5886.222682237625, '_episodes_total': 264}


agent_timesteps_total: 125504
custom_metrics: {}
date: 2022-03-24_20-33-24
done: false
episode_len_mean: 164.43
episode_media: {}
episode_reward_max: 142.95349984355363
episode_reward_mean: -1.831642805146686
episode_reward_min: -7.385912443943092
episodes_this_iter: 40
episodes_total: 264
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 33.82218933105469
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: -0.01168199721723795
        var_gnorm: 44.561073303222656
        vf_explained_var: 0.9223795533180237
        vf_loss: 0.0002621574094519019
  learner_queue:
    size_count: 45
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 125504
  num_steps_sampled: 62752
  num_steps_trained: 111

129 blue_0 DOWN
266 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 266 -0.10100173284336572 -1.423274845093516
blue_1 False False 266 -1.1010054995100325 -2.7879963391609532
-------------------------- Scene: 0 --------------------------
56 blue_0 DOWN
101 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 101 -0.1010007739582852 -1.3049471284575762
blue_1 False False 101 -1.1009882739582852 -1.3051746101853607
-------------------------- Scene: 0 --------------------------
48 blue_0 DOWN
64 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 64 -0.1010018395997789 -1.3049610190618928
blue_1 False False 64 -1.1010096395997788 -1.3059203907163495
-------------------------- Scene: 0 --------------------------
92 blue_0 DOWN
109 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 109 -0.10100172426883322 -1.3049096464219947
blue_1 False False 109 -1.1009985242688332 -1.3406021930610126
-------------------------- Scene: 0 --------------------------
51 blue_0 DOWN
260 blue_1 DOWN
TIME LIMIT LOSE
blue_0 Fal

144 blue_1 DOWN
196 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 196 -1.1010010988167394 -2.12305987811081
blue_1 False False 196 -0.10100086548340609 -2.333228095447073
-------------------------- Scene: 0 --------------------------


2022-03-24 20:45:41,832	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000010\checkpoint-10
2022-03-24 20:45:41,832	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': 123288, '_time_total': 6551.376984357834, '_episodes_total': 303}


agent_timesteps_total: 138030
custom_metrics: {}
date: 2022-03-24_20-45-41
done: false
episode_len_mean: 157.93
episode_media: {}
episode_reward_max: -2.4091309468590065
episode_reward_mean: -3.41728343454595
episode_reward_min: -7.672765513868734
episodes_this_iter: 39
episodes_total: 303
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 41.079132080078125
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.000003814697266
        model: {}
        policy_loss: -0.01724575273692608
        var_gnorm: 44.61925506591797
        vf_explained_var: 0.8879387974739075
        vf_loss: 0.00023129692999646068
  learner_queue:
    size_count: 55
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 138030
  num_steps_sampled: 69015
  num_ste

44 blue_0 DOWN
107 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 107 -0.10100166403409296 -1.3040028030114574
blue_1 False False 107 -1.1009984307007596 -1.7181516279187936
-------------------------- Scene: 0 --------------------------
216 blue_0 DOWN
237 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 237 -0.10100195097763352 -3.744009785736949
blue_1 False False 237 -1.1009989843109669 -2.044379880269344
-------------------------- Scene: 0 --------------------------
87 blue_0 DOWN
161 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 161 -0.1010020085563348 -1.4061536381814495
blue_1 False False 161 -1.1009998085563348 -1.4197773303919643
-------------------------- Scene: 0 --------------------------
62 blue_0 DOWN
228 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 228 -0.10100169526069112 -1.3050422331125784
blue_1 False False 228 -1.1010026619273579 -2.8655811150194417
-------------------------- Scene: 0 --------------------------
43 blue_0 DOWN
102 blue_1 DOWN
TIME LIMIT LOSE
blue_0 

2022-03-24 20:57:20,471	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000011\checkpoint-11
2022-03-24 20:57:20,471	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 11, '_timesteps_total': 135362, '_time_total': 7175.318736791611, '_episodes_total': 337}


agent_timesteps_total: 150056
custom_metrics: {}
date: 2022-03-24_20-57-20
done: false
episode_len_mean: 167.89
episode_media: {}
episode_reward_max: -2.4091309468590065
episode_reward_mean: -3.576004123179752
episode_reward_min: -7.672765513868734
episodes_this_iter: 34
episodes_total: 337
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 47.47509765625
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: 0.0015835389494895935
        var_gnorm: 44.67848205566406
        vf_explained_var: 0.8564549684524536
        vf_loss: 0.0002508815086912364
  learner_queue:
    size_count: 66
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 150056
  num_steps_sampled: 75028
  num_steps_trained: 13536

61 blue_1 DOWN
73 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 73 -1.101007564306242 -1.3052494737532738
blue_1 False False 73 -0.10100186430624206 -1.2043120278269974
-------------------------- Scene: 0 --------------------------
138 blue_0 DOWN
214 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 214 -0.10100197904096743 -1.4133153064746884
blue_1 False False 214 -1.1010001790409674 -5.0843273582884505
-------------------------- Scene: 0 --------------------------
109 blue_0 DOWN
212 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 212 -0.10100171419353343 -1.3130788246874388
blue_1 False False 212 -1.1009975808602002 -2.254265737439334
-------------------------- Scene: 0 --------------------------
166 blue_1 DOWN
171 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 171 -1.1010031699766658 -3.540947192267228
blue_1 False False 171 -0.10100186997666577 -2.503643188651914
-------------------------- Scene: 0 --------------------------
127 blue_0 DOWN
181 blue_1 DOWN
TIME LIMIT LOSE
blue_0 F

171 blue_0 DOWN
348 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 348 -0.10100117136185079 -2.135163617028135
blue_1 False False 348 -1.1010025380285176 -3.977691896991678
-------------------------- Scene: 0 --------------------------
50 blue_1 DOWN
132 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 132 -1.1010011734684728 -1.3051526407337426
blue_1 False False 132 -0.10100087346847275 -1.303700452944497
-------------------------- Scene: 0 --------------------------
89 blue_0 DOWN
115 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 115 -0.1010015803031167 -1.3052270864089666
blue_1 False False 115 -1.1010083136364501 -1.3095564646886875
-------------------------- Scene: 0 --------------------------
107 blue_1 DOWN
108 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 108 -1.101003247349255 -1.4169442541150892
blue_1 False False 108 -0.11314411401592178 -1.4181982311772623
-------------------------- Scene: 0 --------------------------
79 blue_0 DOWN
174 blue_1 DOWN
TIME LIMIT LOSE
blue_0 F

2022-03-24 21:10:15,326	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000012\checkpoint-12
2022-03-24 21:10:15,326	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 12, '_timesteps_total': 147436, '_time_total': 7880.243095636368, '_episodes_total': 381}


agent_timesteps_total: 162910
custom_metrics: {}
date: 2022-03-24_21-10-15
done: false
episode_len_mean: 158.34
episode_media: {}
episode_reward_max: -2.5087335019673898
episode_reward_mean: -3.466977489177254
episode_reward_min: -6.599810294779498
episodes_this_iter: 44
episodes_total: 381
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 52.601318359375
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: 0.00945030152797699
        var_gnorm: 44.73011016845703
        vf_explained_var: 0.7692285776138306
        vf_loss: 0.0003414420934859663
  learner_queue:
    size_count: 78
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 162910
  num_steps_sampled: 81455
  num_steps_trained: 147436

84 blue_0 DOWN
95 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 95 -0.10100064565374842 -1.4057440709989584
blue_1 False False 95 -1.1010049456537485 -1.3356054347619644
-------------------------- Scene: 0 --------------------------
103 blue_1 DOWN
116 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 116 -1.1010016049716094 -1.304967106651758
blue_1 False False 116 -0.10100143830494275 -1.3040611398864048
-------------------------- Scene: 0 --------------------------
63 blue_0 DOWN
177 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 177 -0.10100147210783353 -1.2038413871758937
blue_1 False False 177 -1.101004305441167 -1.9322194222950637
-------------------------- Scene: 0 --------------------------
59 blue_0 DOWN
208 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 208 -0.10100166477371184 -1.709146260142275
blue_1 False False 208 -1.1010028647737118 -1.760458967947884
-------------------------- Scene: 0 --------------------------
141 blue_0 DOWN
203 blue_1 DOWN
TIME LIMIT LOSE
blue_0 Fal

45 blue_0 DOWN
78 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 78 -0.10100166332309707 -1.2040658314436166
blue_1 False False 78 -1.101002163323097 -1.3053806939017942
-------------------------- Scene: 0 --------------------------
91 blue_0 DOWN
149 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 149 -0.10100147017038467 -1.3060420904921397
blue_1 False False 149 -1.100995203503718 -1.3052495149909993
-------------------------- Scene: 0 --------------------------
81 blue_0 DOWN
81 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 81 -1.1009947310284205 -1.3048421857664378
blue_1 False False 81 -1.101003564361754 -1.3052147190997714
-------------------------- Scene: 0 --------------------------
61 blue_0 DOWN
110 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 110 -0.10100083518575949 -1.3050289937449184
blue_1 False False 110 -1.1010159018524261 -1.6086392426667908
-------------------------- Scene: 0 --------------------------
42 blue_1 DOWN
158 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False Fa

2022-03-24 21:22:59,839	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000013\checkpoint-13
2022-03-24 21:22:59,839	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 13, '_timesteps_total': 159510, '_time_total': 8577.155840873718, '_episodes_total': 425}


agent_timesteps_total: 175626
custom_metrics: {}
date: 2022-03-24_21-22-59
done: false
episode_len_mean: 148.17
episode_media: {}
episode_reward_max: -2.5087335019673898
episode_reward_mean: -3.2781912983162336
episode_reward_min: -6.599810294779498
episodes_this_iter: 44
episodes_total: 425
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 57.3355712890625
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: -0.002582433633506298
        var_gnorm: 44.774784088134766
        vf_explained_var: 0.570044755935669
        vf_loss: 0.0007407436496578157
  learner_queue:
    size_count: 91
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 175626
  num_steps_sampled: 87813
  num_steps_trained: 15

133 blue_1 DOWN
214 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 214 -1.1010008966318723 -2.8400373011747515
blue_1 False False 214 -0.10100166329853885 -1.3043404911882273
-------------------------- Scene: 0 --------------------------
83 blue_1 DOWN
154 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 154 -1.1010024108781664 -1.317879357128347
blue_1 False False 154 -0.1010014775448332 -1.3060900263504258
-------------------------- Scene: 0 --------------------------
56 blue_0 DOWN
75 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 75 -0.10100168944488253 -1.3050081917781737
blue_1 False False 75 -1.101002222778216 -1.3052777946611878
-------------------------- Scene: 0 --------------------------
71 blue_1 DOWN
89 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 89 -1.1010090468604492 -1.6093654039333543
blue_1 False False 89 -0.10100188019378259 -1.304103242539067
-------------------------- Scene: 0 --------------------------
111 blue_0 DOWN
118 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False 

82 blue_0 DOWN
95 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 95 -0.10100058254431016 -1.4067888890876925
blue_1 False False 95 -1.1010166825443102 -1.609121128270342
-------------------------- Scene: 0 --------------------------


2022-03-24 21:35:03,404	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000014\checkpoint-14
2022-03-24 21:35:03,420	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 14, '_timesteps_total': 171584, '_time_total': 9232.513207674026, '_episodes_total': 464}


agent_timesteps_total: 187744
custom_metrics: {}
date: 2022-03-24_21-35-03
done: false
episode_len_mean: 151.34
episode_media: {}
episode_reward_max: -2.5089515048584907
episode_reward_mean: -3.325595031058955
episode_reward_min: -7.884123521849622
episodes_this_iter: 39
episodes_total: 464
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 61.793540954589844
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: -0.011403337121009827
        var_gnorm: 44.813480377197266
        vf_explained_var: 0.4996308982372284
        vf_loss: 0.00039731949800625443
  learner_queue:
    size_count: 105
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 187744
  num_steps_sampled: 93872
  num_steps_trained

48 blue_1 DOWN
50 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 50 -1.101001829936882 -1.3049166475301586
blue_1 False False 50 -0.10100166327021544 -1.3046316292387936
-------------------------- Scene: 0 --------------------------
42 blue_1 DOWN
44 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 44 -1.1009988232755212 -1.304910050400814
blue_1 False False 44 -0.10100135660885455 -1.3040368051777755
-------------------------- Scene: 0 --------------------------
56 blue_0 DOWN
82 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 82 -0.10100166368790005 -1.3050199177691313
blue_1 False False 82 -1.1009986303545667 -1.616836597890279
-------------------------- Scene: 0 --------------------------
96 blue_0 DOWN
165 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 165 -0.10100136990083568 -1.4122020889037359
blue_1 False False 165 -1.1010027365675024 -2.529116406847695
-------------------------- Scene: 0 --------------------------
112 blue_0 DOWN
385 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False Fals

2022-03-24 21:46:52,275	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000015\checkpoint-15
2022-03-24 21:46:52,275	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 15, '_timesteps_total': 183658, '_time_total': 9879.164543390274, '_episodes_total': 500}


agent_timesteps_total: 200012
custom_metrics: {}
date: 2022-03-24_21-46-52
done: false
episode_len_mean: 158.36
episode_media: {}
episode_reward_max: -2.509219252684626
episode_reward_mean: -3.3799108559254245
episode_reward_min: -10.414543524098258
episodes_this_iter: 36
episodes_total: 500
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 66.62605285644531
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: 0.00995430164039135
        var_gnorm: 44.84733581542969
        vf_explained_var: 0.2712657153606415
        vf_loss: 0.00035623848089016974
  learner_queue:
    size_count: 120
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 200012
  num_steps_sampled: 100006
  num_steps_trained: 

54 blue_1 DOWN
150 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 150 -1.1009999422541281 -1.3280091777097849
blue_1 False False 150 -0.10100194225412812 -1.3047968896455122
-------------------------- Scene: 0 --------------------------
80 blue_1 DOWN
126 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 126 -1.1009981278567904 -1.416963475531668
blue_1 False False 126 -0.10100212785679044 -1.3214224813226076
-------------------------- Scene: 0 --------------------------
66 blue_0 DOWN
88 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 88 -0.10100137328268438 -1.2040504000059151
blue_1 False False 88 -1.1009993066160177 -1.3054314132767084
-------------------------- Scene: 0 --------------------------
81 blue_0 DOWN
231 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 231 -0.10100202198913291 -1.3057721620207279
blue_1 False False 231 -1.1010021219891328 -1.3289378204364124
-------------------------- Scene: 0 --------------------------
119 blue_1 DOWN
146 blue_0 DOWN
TIME LIMIT LOSE
blue_0 F

74 blue_0 DOWN
150 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 150 -0.10100109873156647 -1.4071721559787616
blue_1 False False 150 -1.1010094987315664 -3.143471501021578
-------------------------- Scene: 0 --------------------------
47 blue_1 DOWN
328 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 328 -1.1010021869659976 -2.4713199931852468
blue_1 False False 328 -0.1010018869659976 -1.303737157600158
-------------------------- Scene: 0 --------------------------
98 blue_1 DOWN
282 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 282 -1.1010034311349033 -2.0784567465472
blue_1 False False 282 -0.10100189780156987 -1.205418202346486
-------------------------- Scene: 0 --------------------------
58 blue_0 DOWN
70 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 70 -0.10100166425601192 -1.2040552157865896
blue_1 False False 70 -1.1010073309226787 -1.3050047555411621
-------------------------- Scene: 0 --------------------------


2022-03-24 21:58:36,612	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000016\checkpoint-16
2022-03-24 21:58:36,612	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 16, '_timesteps_total': 196892, '_time_total': 10546.626066207886, '_episodes_total': 542}


agent_timesteps_total: 212200
custom_metrics: {}
date: 2022-03-24_21-58-36
done: false
episode_len_mean: 153.61
episode_media: {}
episode_reward_max: -2.5086570979041194
episode_reward_mean: -3.335511488388931
episode_reward_min: -10.414543524098258
episodes_this_iter: 42
episodes_total: 542
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 71.56491088867188
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 39.99999237060547
        model: {}
        policy_loss: 0.0012604622170329094
        var_gnorm: 44.88060760498047
        vf_explained_var: 0.20363569259643555
        vf_loss: 0.00036059971898794174
  learner_queue:
    size_count: 136
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 212200
  num_steps_sampled: 106100
  num

62 blue_0 DOWN
64 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 64 -0.1010021272329649 -1.3060440373149786
blue_1 False False 64 -1.1010049605662982 -1.5079878601774126
-------------------------- Scene: 0 --------------------------
100 blue_1 DOWN
138 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 138 -1.1010037879582928 -1.7182041397069523
blue_1 False False 138 -0.10100202129162601 -1.4062459963569318
-------------------------- Scene: 0 --------------------------
68 blue_1 DOWN
122 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 122 -1.1009994646489891 -1.3050173504444746
blue_1 False False 122 -0.10100166464898908 -1.306067878572478
-------------------------- Scene: 0 --------------------------
130 blue_0 DOWN
175 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 175 -0.10100045738785039 -1.3062014058981657
blue_1 False False 175 -1.100999024054517 -1.393667360761567
-------------------------- Scene: 0 --------------------------
71 blue_0 DOWN
98 blue_1 DOWN
TIME LIMIT LOSE
blue_0 Fals

76 blue_0 DOWN
108 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 108 -0.10100152984862464 -1.3051332890515799
blue_1 False False 108 -1.1010043298486247 -1.4130402136431828
-------------------------- Scene: 0 --------------------------
111 blue_1 DOWN
257 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 257 -1.1010047312111935 -2.275237071138866
blue_1 False False 257 -0.10100173121119335 -1.4060883100093247
-------------------------- Scene: 0 --------------------------
56 blue_0 DOWN
130 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 130 -0.10100143997217792 -1.4068542482606743
blue_1 False False 130 -1.1010050733055112 -1.3061995800562154
-------------------------- Scene: 0 --------------------------
50 blue_0 DOWN
193 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 193 -0.1010012728809928 -1.3049536941824145
blue_1 False False 193 -1.100998806214326 -1.406799577825148
-------------------------- Scene: 0 --------------------------


2022-03-24 22:10:37,712	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000017\checkpoint-17
2022-03-24 22:10:37,712	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 17, '_timesteps_total': 210126, '_time_total': 11207.12483215332, '_episodes_total': 584}


agent_timesteps_total: 224274
custom_metrics: {}
date: 2022-03-24_22-10-37
done: false
episode_len_mean: 151.61
episode_media: {}
episode_reward_max: -2.5086570979041194
episode_reward_mean: -3.185951286543985
episode_reward_min: -7.034554140559365
episodes_this_iter: 42
episodes_total: 584
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 76.51488494873047
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: -0.02609047293663025
        var_gnorm: 44.91399002075195
        vf_explained_var: 0.12575790286064148
        vf_loss: 0.0002738733310252428
  learner_queue:
    size_count: 153
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 224274
  num_steps_sampled: 112137
  num_steps_trained: 

44 blue_0 DOWN
85 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 85 -0.10100104704327177 -1.3040504654729699
blue_1 False False 85 -1.1010043137099383 -1.305066679080466
-------------------------- Scene: 0 --------------------------
297 blue_1 DOWN
304 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 304 -1.1009975386278237 -2.435340479866204
blue_1 False False 304 -0.1010019719611569 -3.048549059090302
-------------------------- Scene: 0 --------------------------
51 blue_1 DOWN
363 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 363 -1.1009977055883133 -2.4380588535046845
blue_1 False False 363 -0.10100107225498009 -1.4070296683070451
-------------------------- Scene: 0 --------------------------
80 blue_1 DOWN
175 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 175 -1.101002922174048 -1.4597271361347113
blue_1 False False 175 -0.10100198884071464 -1.3048979197025845
-------------------------- Scene: 0 --------------------------
47 blue_1 DOWN
69 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False 

71 blue_1 DOWN
141 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 141 -1.100997053321554 -2.532501266829175
blue_1 False False 141 -0.10100258665488748 -1.3053796830566382
-------------------------- Scene: 0 --------------------------
90 blue_1 DOWN
280 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 280 -1.1010060349348598 -1.3114912262077452
blue_1 False False 280 -0.10100096826819316 -1.3052848737120617
-------------------------- Scene: 0 --------------------------


2022-03-24 22:23:02,144	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000018\checkpoint-18
2022-03-24 22:23:02,144	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 18, '_timesteps_total': 223360, '_time_total': 11891.44254207611, '_episodes_total': 624}


agent_timesteps_total: 237212
custom_metrics: {}
date: 2022-03-24_22-23-01
done: false
episode_len_mean: 153.93
episode_media: {}
episode_reward_max: 2.4547854289931212
episode_reward_mean: -3.0950563582908353
episode_reward_min: -7.034554140559365
episodes_this_iter: 40
episodes_total: 624
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 81.81954193115234
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: -0.008001763373613358
        var_gnorm: 44.946929931640625
        vf_explained_var: 0.43103593587875366
        vf_loss: 9.025270992424339e-05
  learner_queue:
    size_count: 171
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 237212
  num_steps_sampled: 118606
  num_steps_trained

104 blue_0 DOWN
138 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 138 -0.10100105945967769 -1.3050592661619729
blue_1 False False 138 -1.1009942594596778 -1.5191288416382478
-------------------------- Scene: 0 --------------------------
76 blue_0 DOWN
157 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 157 -0.10100141948086229 -1.3047607645826726
blue_1 False False 157 -1.1010012194808623 -1.2053650496509491
-------------------------- Scene: 0 --------------------------
72 blue_1 DOWN
213 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 213 -1.1009987887114596 -2.636274340496602
blue_1 False False 213 -0.10100125537812635 -1.3048805769941092
-------------------------- Scene: 0 --------------------------
85 blue_1 DOWN
141 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 141 -1.101002364297995 -1.304991308178118
blue_1 False False 141 -0.1010016642979949 -1.3053453105165636
-------------------------- Scene: 0 --------------------------
74 blue_0 DOWN
77 blue_1 DOWN
TIME LIMIT LOSE
blue_0 Fa

61 blue_0 DOWN
211 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 211 -0.10100149472133724 -1.3051091430023876
blue_1 False False 211 -1.1010021947213373 -2.8418142761681997
-------------------------- Scene: 0 --------------------------
101 blue_1 DOWN
104 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 104 -1.1009961065986298 -1.3041089393764567
blue_1 False False 104 -0.10100177326529641 -1.305838552784354
-------------------------- Scene: 0 --------------------------
95 blue_0 DOWN
376 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 376 -0.10100160347921748 -2.4228513686209947
blue_1 False False 376 -1.1010008034792176 -5.043440339756535
-------------------------- Scene: 0 --------------------------
92 blue_1 DOWN
111 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 111 -1.1010080635630002 -1.3047899286582927
blue_1 False False 111 -0.10100213022966695 -1.339599554713775
-------------------------- Scene: 0 --------------------------
168 blue_1 DOWN
442 blue_0 DOWN
TIME LIMIT LOSE
blue_0

2022-03-24 22:35:55,396	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000019\checkpoint-19
2022-03-24 22:35:55,396	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 19, '_timesteps_total': 236594, '_time_total': 12588.993654251099, '_episodes_total': 667}


agent_timesteps_total: 250118
custom_metrics: {}
date: 2022-03-24_22-35-55
done: false
episode_len_mean: 154.26
episode_media: {}
episode_reward_max: 2.4547854289931212
episode_reward_mean: -3.0868591958511242
episode_reward_min: -7.46629170837752
episodes_this_iter: 43
episodes_total: 667
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 87.29395294189453
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: 0.0006282376125454903
        var_gnorm: 44.98329544067383
        vf_explained_var: 0.09928816556930542
        vf_loss: 0.00016397732542827725
  learner_queue:
    size_count: 190
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 250118
  num_steps_sampled: 125059
  num_steps_trained:

50 blue_1 DOWN
112 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 112 -1.1010084815723056 -1.4062872217440563
blue_1 False False 112 -0.10100134823897233 -1.3043492911934083
-------------------------- Scene: 0 --------------------------
84 blue_0 DOWN
143 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 143 -0.10100166444193995 -1.3053077991115611
blue_1 False False 143 -1.10100196444194 -1.4062889222271948
-------------------------- Scene: 0 --------------------------
107 blue_1 DOWN
121 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 121 -1.1010076987448523 -1.4064337212721236
blue_1 False False 121 -0.1010015987448522 -1.7181571160936764
-------------------------- Scene: 0 --------------------------
75 blue_0 DOWN
141 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 141 -0.10100084039395586 -1.3057559257170726
blue_1 False False 141 -1.1010047070606226 -1.4427363704816185
-------------------------- Scene: 0 --------------------------
82 blue_1 DOWN
629 blue_0 Shoot at red_0 launch distan

67 blue_0 DOWN
86 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 86 -0.10100166403843622 -1.306112658812199
blue_1 False False 86 -1.1010074640384362 -1.507070583728349
-------------------------- Scene: 0 --------------------------
113 blue_1 DOWN
189 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 189 -1.1010070971263584 -1.4138607293365841
blue_1 False False 189 -0.10100166379302501 -1.3051269690188418
-------------------------- Scene: 0 --------------------------


2022-03-24 22:48:16,186	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000020\checkpoint-20
2022-03-24 22:48:16,186	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 20, '_timesteps_total': 249828, '_time_total': 13254.553140640259, '_episodes_total': 707}


agent_timesteps_total: 262562
custom_metrics: {}
date: 2022-03-24_22-48-15
done: false
episode_len_mean: 156.71
episode_media: {}
episode_reward_max: 2.4547854289931212
episode_reward_mean: -3.068996895521845
episode_reward_min: -7.46629170837752
episodes_this_iter: 40
episodes_total: 707
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 92.8756103515625
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 39.99999237060547
        model: {}
        policy_loss: 0.011260329745709896
        var_gnorm: 45.02030944824219
        vf_explained_var: 0.24516230821609497
        vf_loss: 9.838679397944361e-05
  learner_queue:
    size_count: 210
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 262562
  num_steps_sampled: 131281
  num_steps

48 blue_0 DOWN
81 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 81 -0.10100173968759601 -1.3050440022087157
blue_1 False False 81 -1.1010083730209295 -1.3051982117132317
-------------------------- Scene: 0 --------------------------
120 blue_1 DOWN
188 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 188 -1.1010034843142764 -1.3200902305820041
blue_1 False False 188 -0.1010021843142764 -1.9205117697676093
-------------------------- Scene: 0 --------------------------
133 blue_1 DOWN
163 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 163 -1.1009929796135551 -1.417365757231082
blue_1 False False 163 -0.10100184628022175 -1.3053409126941355
-------------------------- Scene: 0 --------------------------
55 blue_1 DOWN
330 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 330 -1.101001728374145 -2.7055452598575798
blue_1 False False 330 -0.10100162837414486 -1.3048768524731824
-------------------------- Scene: 0 --------------------------
121 blue_1 DOWN
268 blue_0 DOWN
TIME LIMIT LOSE
blue_0 F

75 blue_0 DOWN
102 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 102 -0.10100131412688125 -1.4061763166939427
blue_1 False False 102 -1.1009951141268812 -1.305126718640774
-------------------------- Scene: 0 --------------------------
44 blue_0 DOWN
90 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 90 -0.10100160629381363 -1.203920146540226
blue_1 False False 90 -1.1010062729604804 -1.3051581461598158
-------------------------- Scene: 0 --------------------------


2022-03-24 23:00:35,519	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000021\checkpoint-21
2022-03-24 23:00:35,519	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 21, '_timesteps_total': 263062, '_time_total': 13924.501874446869, '_episodes_total': 747}


agent_timesteps_total: 275114
custom_metrics: {}
date: 2022-03-24_23-00-35
done: false
episode_len_mean: 160.5
episode_media: {}
episode_reward_max: 1.0748782531860759
episode_reward_mean: -3.181919746584538
episode_reward_min: -7.46629170837752
episodes_this_iter: 40
episodes_total: 747
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 98.48529052734375
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 39.99999237060547
        model: {}
        policy_loss: 0.012934371829032898
        var_gnorm: 45.0570068359375
        vf_explained_var: 0.04877614974975586
        vf_loss: 0.0001828656968427822
  learner_queue:
    size_count: 230
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 275114
  num_steps_sampled: 137557
  num_steps_

52 blue_0 DOWN
117 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 117 -0.10100215606778165 -1.4060541226048278
blue_1 False False 117 -1.1010015560677817 -2.219418971890593
-------------------------- Scene: 0 --------------------------
75 blue_0 DOWN
223 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 223 -0.10100215838569786 -1.406876161564594
blue_1 False False 223 -1.101002891719031 -1.9472450625156603
-------------------------- Scene: 0 --------------------------
70 blue_1 DOWN
82 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 82 -1.1010094064163711 -1.4064070501371235
blue_1 False False 82 -0.10100180641637121 -1.3052817852810539
-------------------------- Scene: 0 --------------------------
57 blue_1 DOWN
68 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 68 -1.101005014086347 -1.3048190726920668
blue_1 False False 68 -0.10100181408634698 -1.3052726009111366
-------------------------- Scene: 0 --------------------------
98 blue_1 DOWN
403 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False Fa

2022-03-24 23:12:31,468	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000022\checkpoint-22
2022-03-24 23:12:31,468	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 22, '_timesteps_total': 275348, '_time_total': 14567.606514692307, '_episodes_total': 781}


agent_timesteps_total: 287652
custom_metrics: {}
date: 2022-03-24_23-12-31
done: false
episode_len_mean: 167.09
episode_media: {}
episode_reward_max: 1.0748782531860759
episode_reward_mean: -3.2399173898301803
episode_reward_min: -7.392051522840614
episodes_this_iter: 34
episodes_total: 781
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 103.97892761230469
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.000003814697266
        model: {}
        policy_loss: 0.001039870665408671
        var_gnorm: 45.094818115234375
        vf_explained_var: 0.15128743648529053
        vf_loss: 0.00024908798513934016
  learner_queue:
    size_count: 250
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 287652
  num_steps_sampled: 143826
  nu

85 blue_0 DOWN
237 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 237 -0.10100112360355072 -1.3050564952255344
blue_1 False False 237 -1.1009999236035508 -1.9764870539428185
-------------------------- Scene: 0 --------------------------
49 blue_1 DOWN
454 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 454 -1.1009911260194976 -4.9011448348785756
blue_1 False False 454 -0.10100135935283083 -1.3053818618416195
-------------------------- Scene: 0 --------------------------
154 blue_0 DOWN
220 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 220 -0.10100125700169085 -3.6400420438267296
blue_1 False False 220 -1.101003490335024 -3.559684286949064
-------------------------- Scene: 0 --------------------------
96 blue_1 DOWN
149 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 149 -1.1010084643318572 -1.4072588858357127
blue_1 False False 149 -0.10100166433185716 -1.303612916413251
-------------------------- Scene: 0 --------------------------
99 blue_0 DOWN
171 blue_1 DOWN
TIME LIMIT LOSE
blue_0 

2022-03-24 23:23:33,882	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000023\checkpoint-23
2022-03-24 23:23:33,882	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 23, '_timesteps_total': 287634, '_time_total': 15216.896839380264, '_episodes_total': 815}


agent_timesteps_total: 300272
custom_metrics: {}
date: 2022-03-24_23-23-33
done: false
episode_len_mean: 175.64
episode_media: {}
episode_reward_max: 3.034952270707281
episode_reward_mean: -3.4173303742381442
episode_reward_min: -7.726193520522438
episodes_this_iter: 34
episodes_total: 815
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 109.36186218261719
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: -0.011420035734772682
        var_gnorm: 45.134857177734375
        vf_explained_var: 0.10183066129684448
        vf_loss: 0.00016547772975172848
  learner_queue:
    size_count: 270
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 300272
  num_steps_sampled: 150136
  num_steps_traine

98 blue_0 DOWN
103 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 103 -0.10100183095233763 -1.305346724197272
blue_1 False False 103 -1.101011864285671 -1.506979115774014
-------------------------- Scene: 0 --------------------------
74 blue_0 DOWN
146 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 146 -0.10100166157005178 -1.3052208964955712
blue_1 False False 146 -1.1009978282367185 -1.3338130031888427
-------------------------- Scene: 0 --------------------------
123 blue_0 DOWN
207 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 207 -0.10100166390012912 -1.3043355397235334
blue_1 False False 207 -1.101007463900129 -2.3461604098376565
-------------------------- Scene: 0 --------------------------
108 blue_1 DOWN
216 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 216 -1.1009995642926238 -1.832362694756636
blue_1 False False 216 -0.10100166429262385 -1.6081539228251691
-------------------------- Scene: 0 --------------------------
90 blue_0 DOWN
140 blue_1 DOWN
TIME LIMIT LOSE
blue_0 F

2022-03-24 23:35:09,944	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000024\checkpoint-24
2022-03-24 23:35:09,944	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 24, '_timesteps_total': 299920, '_time_total': 15843.429933547974, '_episodes_total': 849}


agent_timesteps_total: 312604
custom_metrics: {}
date: 2022-03-24_23-35-09
done: false
episode_len_mean: 183.29
episode_media: {}
episode_reward_max: 3.034952270707281
episode_reward_mean: -3.4872511932999726
episode_reward_min: -7.726193520522438
episodes_this_iter: 34
episodes_total: 849
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 114.63813781738281
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 39.999996185302734
        model: {}
        policy_loss: 0.010120293125510216
        var_gnorm: 45.17226028442383
        vf_explained_var: 0.16979753971099854
        vf_loss: 9.47157823247835e-05
  learner_queue:
    size_count: 290
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 312604
  num_steps_sampled: 156302
  num_st

115 blue_0 DOWN
143 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 143 -0.10100166448420723 -1.5189261773524234
blue_1 False False 143 -1.1010041311508738 -1.3244686780915755
-------------------------- Scene: 0 --------------------------
48 blue_1 DOWN
120 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 120 -1.1010065070600374 -2.625074463127867
blue_1 False False 120 -0.10100094039337085 -1.304732974897317
-------------------------- Scene: 0 --------------------------
90 blue_0 DOWN
113 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 113 -0.10100213022576737 -1.4060177670314231
blue_1 False False 113 -1.1010055635591007 -1.304043957085956
-------------------------- Scene: 0 --------------------------
84 blue_1 DOWN
213 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 213 -1.1010066548197488 -3.566104184613458
blue_1 False False 213 -0.10100168815308219 -1.4346337272878993
-------------------------- Scene: 0 --------------------------
93 blue_0 DOWN
109 blue_1 DOWN
TIME LIMIT LOSE
blue_0 F

252 blue_1 DOWN
265 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 265 -1.1009998131629781 -3.8742234420481694
blue_1 False False 265 -0.10100131316297813 -4.00962574146709
-------------------------- Scene: 0 --------------------------
168 blue_1 DOWN
351 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 351 -1.1010031036063748 -3.362234511328573
blue_1 False False 351 -0.10100083693970817 -1.3049464067642984
-------------------------- Scene: 0 --------------------------


2022-03-24 23:47:37,446	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000025\checkpoint-25
2022-03-24 23:47:37,446	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 25, '_timesteps_total': 312206, '_time_total': 16522.504568099976, '_episodes_total': 889}


agent_timesteps_total: 325502
custom_metrics: {}
date: 2022-03-24_23-47-37
done: false
episode_len_mean: 177.71
episode_media: {}
episode_reward_max: 3.034952270707281
episode_reward_mean: -3.482367572397964
episode_reward_min: -10.114241060157608
episodes_this_iter: 40
episodes_total: 889
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 119.87520599365234
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.000003814697266
        model: {}
        policy_loss: -0.00916097592562437
        var_gnorm: 45.209556579589844
        vf_explained_var: 0.2222355306148529
        vf_loss: 4.834221908822656e-05
  learner_queue:
    size_count: 310
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 325502
  num_steps_sampled: 162751
  num_s

105 blue_0 DOWN
185 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 185 -0.10100117885657918 -1.5081097083422932
blue_1 False False 185 -1.1009977121899126 -2.7540783203777366
-------------------------- Scene: 0 --------------------------
46 blue_0 DOWN
60 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 60 -0.10100159767656963 -1.3038649383659429
blue_1 False False 60 -1.100997531009903 -1.3050063310589737
-------------------------- Scene: 0 --------------------------
49 blue_0 DOWN
120 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 120 -0.10100182026375838 -1.3049941753350873
blue_1 False False 120 -1.1010116202637583 -1.3051489912853396
-------------------------- Scene: 0 --------------------------
53 blue_1 DOWN
78 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 78 -1.1010043315267066 -1.3052148168484674
blue_1 False False 78 -0.10100166486004003 -1.407069846137708
-------------------------- Scene: 0 --------------------------
71 blue_0 DOWN
82 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False 

115 blue_1 DOWN
154 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 154 -1.101002085169081 -1.8431527350029562
blue_1 False False 154 -0.10100191850241436 -1.305090096286106
-------------------------- Scene: 0 --------------------------
111 blue_1 DOWN
154 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 154 -1.1010000281625756 -2.324080678580776
blue_1 False False 154 -0.10100216149590893 -1.4058445439977847
-------------------------- Scene: 0 --------------------------


2022-03-24 23:59:35,556	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000026\checkpoint-26
2022-03-24 23:59:35,556	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 26, '_timesteps_total': 324492, '_time_total': 17169.295637130737, '_episodes_total': 929}


agent_timesteps_total: 337630
custom_metrics: {}
date: 2022-03-24_23-59-35
done: false
episode_len_mean: 159.56
episode_media: {}
episode_reward_max: -2.5084183609291664
episode_reward_mean: -3.393938370115693
episode_reward_min: -10.114241060157608
episodes_this_iter: 40
episodes_total: 929
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 125.13308715820312
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 39.99999237060547
        model: {}
        policy_loss: -0.0018676826730370522
        var_gnorm: 45.251014709472656
        vf_explained_var: 0.2723993957042694
        vf_loss: 0.00011314314906485379
  learner_queue:
    size_count: 330
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 337630
  num_steps_sampled: 168815
  n

145 blue_0 DOWN
168 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 168 -0.10100148976961805 -1.3054787425292842
blue_1 False False 168 -1.1010008564362848 -2.6850321609534156
-------------------------- Scene: 0 --------------------------
50 blue_0 DOWN
103 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 103 -0.10100164816558052 -1.3060875380048869
blue_1 False False 103 -1.101000281498914 -1.3130229957317523
-------------------------- Scene: 0 --------------------------
77 blue_1 DOWN
126 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 126 -1.1010000228714114 -1.4068316333452797
blue_1 False False 126 -0.10100165620474483 -1.3050920555102512
-------------------------- Scene: 0 --------------------------
71 blue_0 DOWN
124 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 124 -0.10100208840201899 -1.3040885290490747
blue_1 False False 124 -1.1010036217353523 -1.305632508431554
-------------------------- Scene: 0 --------------------------
75 blue_0 DOWN
124 blue_1 DOWN
TIME LIMIT LOSE
blue_0

2022-03-25 00:11:27,320	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000027\checkpoint-27
2022-03-25 00:11:27,320	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 27, '_timesteps_total': 336778, '_time_total': 17813.153364896774, '_episodes_total': 967}


agent_timesteps_total: 350014
custom_metrics: {}
date: 2022-03-25_00-11-27
done: false
episode_len_mean: 154.61
episode_media: {}
episode_reward_max: 130.62034139970385
episode_reward_mean: -1.8648475057149059
episode_reward_min: -7.883849183515256
episodes_this_iter: 38
episodes_total: 967
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 130.39634704589844
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.000003814697266
        model: {}
        policy_loss: 0.01106201857328415
        var_gnorm: 45.294410705566406
        vf_explained_var: 0.10325908660888672
        vf_loss: 0.00015576672740280628
  learner_queue:
    size_count: 350
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 350014
  num_steps_sampled: 175007
  num

67 blue_1 DOWN
101 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 101 -1.1010015880665351 -1.3048776209521333
blue_1 False False 101 -0.10100185473320189 -1.3040732264246115
-------------------------- Scene: 0 --------------------------
133 blue_0 DOWN
214 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 214 -0.10100166379930917 -1.4057708830557978
blue_1 False False 214 -1.1009984971326425 -2.460355582754076
-------------------------- Scene: 0 --------------------------
48 blue_0 DOWN
115 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 115 -0.1010008407384961 -1.2039644881899207
blue_1 False False 115 -1.1009986074051628 -1.352569646392195
-------------------------- Scene: 0 --------------------------
117 blue_1 DOWN
129 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 129 -1.1010154714608618 -1.3053952757162008
blue_1 False False 129 -0.10100150479419515 -1.3043958040375807
-------------------------- Scene: 0 --------------------------
43 blue_0 DOWN
57 blue_1 DOWN
TIME LIMIT LOSE
blue_0 

2022-03-25 00:23:19,091	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000028\checkpoint-28
2022-03-25 00:23:19,106	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 28, '_timesteps_total': 349064, '_time_total': 18455.38035583496, '_episodes_total': 1005}


agent_timesteps_total: 362208
custom_metrics: {}
date: 2022-03-25_00-23-18
done: false
episode_len_mean: 160.06
episode_media: {}
episode_reward_max: 130.62034139970385
episode_reward_mean: -1.3004351946308033
episode_reward_min: -6.77914685226739
episodes_this_iter: 38
episodes_total: 1005
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 133.59580993652344
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.00000762939453
        model: {}
        policy_loss: 0.353166401386261
        var_gnorm: 45.307640075683594
        vf_explained_var: 0.1879192590713501
        vf_loss: 0.026442069560289383
  learner_queue:
    size_count: 370
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 362208
  num_steps_sampled: 181104
  num_steps

44 blue_1 DOWN
90 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 90 -1.1009974063959396 -1.3060669202774284
blue_1 False False 90 -0.10100060639593968 -1.4060823411954044
-------------------------- Scene: 0 --------------------------
65 blue_0 DOWN
174 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 174 -0.10100166512485323 -1.5077865519180442
blue_1 False False 174 -1.10100023179152 -1.5777258066066628
-------------------------- Scene: 0 --------------------------
70 blue_0 DOWN
229 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 229 -0.10100243333424082 -1.3051017564908534
blue_1 False False 229 -1.1010040000009074 -2.3572399861041387
-------------------------- Scene: 0 --------------------------
102 blue_0 DOWN
288 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 288 -0.10100159794883007 -1.3053176369009694
blue_1 False False 288 -1.10100209794883 -3.874933061199515
-------------------------- Scene: 0 --------------------------
90 blue_0 DOWN
235 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False

2022-03-25 00:35:31,912	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000029\checkpoint-29
2022-03-25 00:35:31,912	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 29, '_timesteps_total': 362184, '_time_total': 19118.00942850113, '_episodes_total': 1043}


agent_timesteps_total: 374840
custom_metrics: {}
date: 2022-03-25_00-35-31
done: false
episode_len_mean: 165.95
episode_media: {}
episode_reward_max: 130.62034139970385
episode_reward_mean: -1.374139123476661
episode_reward_min: -6.77914685226739
episodes_this_iter: 38
episodes_total: 1043
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 137.50006103515625
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: 0.2365644872188568
        var_gnorm: 45.305572509765625
        vf_explained_var: 0.36739006638526917
        vf_loss: 0.008488242514431477
  learner_queue:
    size_count: 390
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 374840
  num_steps_sampled: 187420
  num_steps_trained: 36

93 blue_0 DOWN
111 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 111 -0.1010016636251443 -1.4057829587804989
blue_1 False False 111 -1.101002930291811 -1.305179015193145
-------------------------- Scene: 0 --------------------------
46 blue_0 DOWN
113 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 113 -0.10100106481290604 -1.3039243284941346
blue_1 False False 113 -1.1009948981462394 -1.919861913639724
-------------------------- Scene: 0 --------------------------
61 blue_0 DOWN
551 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 551 -0.10100164663490233 -1.3050840653987037
blue_1 False False 551 -0.7049963133015689 7.157120637646799
-------------------------- Scene: 0 --------------------------
76 blue_0 DOWN
76 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 76 -1.1009993739297514 -1.3050631463801694
blue_1 False False 76 -1.101000240596418 -1.4070433797135027
-------------------------- Scene: 0 --------------------------
62 blue_0 DOWN
92 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False Fals

2022-03-25 00:47:19,706	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000030\checkpoint-30
2022-03-25 00:47:19,706	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 30, '_timesteps_total': 375304, '_time_total': 19753.840520381927, '_episodes_total': 1079}


agent_timesteps_total: 387288
custom_metrics: {}
date: 2022-03-25_00-47-19
done: false
episode_len_mean: 168.88
episode_media: {}
episode_reward_max: 44.80753976623623
episode_reward_mean: -2.6904307693299376
episode_reward_min: -6.553059547031859
episodes_this_iter: 36
episodes_total: 1079
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 142.34109497070312
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: -0.10776452720165253
        var_gnorm: 45.323486328125
        vf_explained_var: 0.1808253526687622
        vf_loss: 0.00017791078425943851
  learner_queue:
    size_count: 410
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 387288
  num_steps_sampled: 193644
  num_steps_trained: 3

65 blue_0 DOWN
150 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 150 -0.10100182984562173 -1.3051814677327032
blue_1 False False 150 -1.101002963178955 -1.8201601813544128
-------------------------- Scene: 0 --------------------------
73 blue_0 DOWN
77 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 77 -0.1010016646879027 -1.305124186745747
blue_1 False False 77 -1.100996998021236 -1.304900013045789
-------------------------- Scene: 0 --------------------------
84 blue_0 DOWN
117 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 117 -0.10100191992235887 -1.3051340321967613
blue_1 False False 117 -1.1010020865890255 -1.306477591053531
-------------------------- Scene: 0 --------------------------
69 blue_1 DOWN
84 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 84 -1.1010026392349634 -1.3051620386638059
blue_1 False False 84 -0.10100177256829668 -1.3054565047685611
-------------------------- Scene: 0 --------------------------
61 blue_1 DOWN
309 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False Fals

2022-03-25 00:59:09,475	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000031\checkpoint-31
2022-03-25 00:59:09,475	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 31, '_timesteps_total': 388424, '_time_total': 20396.111497163773, '_episodes_total': 1117}


agent_timesteps_total: 399422
custom_metrics: {}
date: 2022-03-25_00-59-09
done: false
episode_len_mean: 163.53
episode_media: {}
episode_reward_max: 5.852036572248097
episode_reward_mean: -3.1924627770333194
episode_reward_min: -6.608699968770037
episodes_this_iter: 38
episodes_total: 1117
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 147.43960571289062
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: 0.03434919938445091
        var_gnorm: 45.36962127685547
        vf_explained_var: 0.4184255003929138
        vf_loss: 0.0002603040193207562
  learner_queue:
    size_count: 430
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 399422
  num_steps_sampled: 199711
  num_steps_trained: 3

116 blue_0 DOWN
154 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 154 -0.10100098890729209 -1.3053558787080648
blue_1 False False 154 -1.1009974555739588 -1.818869258724467
-------------------------- Scene: 0 --------------------------
55 blue_1 DOWN
117 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 117 -1.1010019640680866 -1.3110381627660752
blue_1 False False 117 -0.1010017974014199 -1.4062243256954423
-------------------------- Scene: 0 --------------------------
69 blue_0 DOWN
135 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 135 -0.10100185696620138 -1.4071172495888336
blue_1 False False 135 -1.1010024569662014 -1.3218526083742173
-------------------------- Scene: 0 --------------------------
50 blue_0 DOWN
128 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 128 -0.10100166272615704 -1.3049679401805059
blue_1 False False 128 -1.1010023627261571 -1.3576888342936366
-------------------------- Scene: 0 --------------------------
48 blue_1 DOWN
126 blue_0 DOWN
TIME LIMIT LOSE
blue_0

56 blue_0 DOWN
209 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 209 -0.10100215877700677 -1.3060255530268081
blue_1 False False 209 -1.1010008254436734 -3.452181865207543
-------------------------- Scene: 0 --------------------------
57 blue_1 DOWN
200 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 200 -1.1010017559650231 -3.1000364394553266
blue_1 False False 200 -0.10100192263168983 -1.30510853470242
-------------------------- Scene: 0 --------------------------
45 blue_1 DOWN
59 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 59 -1.1010008573786407 -1.204051132092982
blue_1 False False 59 -0.1010016240453074 -1.3038708865553035
-------------------------- Scene: 0 --------------------------
84 blue_1 DOWN
108 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 108 -1.100997664291079 -1.3061098554979902
blue_1 False False 108 -0.10100186429107898 -1.6070454199119926
-------------------------- Scene: 0 --------------------------


2022-03-25 01:11:30,536	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000032\checkpoint-32
2022-03-25 01:11:30,536	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 32, '_timesteps_total': 401544, '_time_total': 21064.263538599014, '_episodes_total': 1159}


agent_timesteps_total: 411614
custom_metrics: {}
date: 2022-03-25_01-11-30
done: false
episode_len_mean: 154.17
episode_media: {}
episode_reward_max: 5.852036572248097
episode_reward_mean: -3.1514886938145823
episode_reward_min: -6.608699968770037
episodes_this_iter: 42
episodes_total: 1159
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 152.69552612304688
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 39.999996185302734
        model: {}
        policy_loss: 0.02857125550508499
        var_gnorm: 45.43384552001953
        vf_explained_var: 0.30086904764175415
        vf_loss: 7.934303721413016e-05
  learner_queue:
    size_count: 450
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 411614
  num_steps_sampled: 205807
  num_s

74 blue_0 DOWN
137 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 137 -0.10100166420741882 -1.3052819752445985
blue_1 False False 137 -1.101002297540752 -2.438880309801363
-------------------------- Scene: 0 --------------------------
80 blue_0 DOWN
96 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 96 -0.10100161417498144 -1.4062637840508982
blue_1 False False 96 -1.1009959141749814 -1.4097224201003324
-------------------------- Scene: 0 --------------------------
63 blue_1 DOWN
94 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 94 -1.10099461460829 -1.407052965803114
blue_1 False False 94 -0.10100134794162349 -1.3063380614363516
-------------------------- Scene: 0 --------------------------
49 blue_1 DOWN
95 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 95 -1.1010098446147638 -1.6102333067662262
blue_1 False False 95 -0.10100204461476377 -1.3048511629087425
-------------------------- Scene: 0 --------------------------
45 blue_0 DOWN
59 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 5

90 blue_0 DOWN
137 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 137 -0.10100133931046733 -1.3052038282870837
blue_1 False False 137 -1.101002505977134 -1.350656048436184
-------------------------- Scene: 0 --------------------------
48 blue_1 DOWN
127 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 127 -1.1009909149944195 -2.3229398398681256
blue_1 False False 127 -0.10100101499441946 -1.3042802727384262
-------------------------- Scene: 0 --------------------------
118 blue_1 DOWN
127 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 127 -1.1010015383898992 -2.323154187320301
blue_1 False False 127 -0.10100220505656585 -1.3058650484299883
-------------------------- Scene: 0 --------------------------
43 blue_0 DOWN
47 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 47 -0.10100144576159462 -1.2039729768254241
blue_1 False False 47 -1.1010039457615945 -1.303736156449469
-------------------------- Scene: 0 --------------------------
47 blue_0 DOWN
136 blue_1 DOWN
TIME LIMIT LOSE
blue_0 Fals

2022-03-25 01:24:17,773	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000033\checkpoint-33
2022-03-25 01:24:17,773	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 33, '_timesteps_total': 414664, '_time_total': 21761.38661837578, '_episodes_total': 1204}


agent_timesteps_total: 424222
custom_metrics: {}
date: 2022-03-25_01-24-17
done: false
episode_len_mean: 140.46
episode_media: {}
episode_reward_max: -2.409054309920849
episode_reward_mean: -3.20098645478524
episode_reward_min: -6.793844048178446
episodes_this_iter: 45
episodes_total: 1204
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 157.97605895996094
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.00000762939453
        model: {}
        policy_loss: 0.007223861757665873
        var_gnorm: 45.48797607421875
        vf_explained_var: 0.39819836616516113
        vf_loss: 0.00011534616351127625
  learner_queue:
    size_count: 470
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 424222
  num_steps_sampled: 212111
  num_s

75 blue_0 DOWN
155 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 155 -0.10100167850299485 -1.3051405047043265
blue_1 False False 155 -1.1010005785029948 -1.3676453011989285
-------------------------- Scene: 0 --------------------------
103 blue_0 DOWN
113 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 113 -0.10100178928437203 -1.5082152304223464
blue_1 False False 113 -1.1010074559510388 -1.3050648133996305
-------------------------- Scene: 0 --------------------------
131 blue_0 DOWN
207 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 207 -0.10100125518828738 -2.4250266971616394
blue_1 False False 207 -1.101000821854954 -1.3175895517086167
-------------------------- Scene: 0 --------------------------
90 blue_0 DOWN
98 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 98 -0.1010016648397157 -1.305326568552204
blue_1 False False 98 -1.1010019315063824 -1.4071703909256232
-------------------------- Scene: 0 --------------------------
90 blue_1 DOWN
144 blue_0 DOWN
TIME LIMIT LOSE
blue_0 Fa

2022-03-25 01:36:02,645	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000034\checkpoint-34
2022-03-25 01:36:02,645	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 34, '_timesteps_total': 427784, '_time_total': 22401.683601617813, '_episodes_total': 1241}


agent_timesteps_total: 436436
custom_metrics: {}
date: 2022-03-25_01-36-02
done: false
episode_len_mean: 151.38
episode_media: {}
episode_reward_max: 133.03764687317897
episode_reward_mean: -1.7784288978603953
episode_reward_min: -6.793844048178446
episodes_this_iter: 37
episodes_total: 1241
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 163.27761840820312
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: -0.07306555658578873
        var_gnorm: 45.54713439941406
        vf_explained_var: 0.3004818260669708
        vf_loss: 0.0003144388902001083
  learner_queue:
    size_count: 490
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 436436
  num_steps_sampled: 218218
  num_steps_trained:

73 blue_1 DOWN
194 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 194 -1.1009980890647444 -1.410025216654116
blue_1 False False 194 -0.10100165573141102 -1.3047677517628402
-------------------------- Scene: 0 --------------------------
62 blue_1 DOWN
63 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 63 -1.1009970990307474 -1.2058185532393157
blue_1 False False 63 -0.113237965697414 -1.3171117719942005
-------------------------- Scene: 0 --------------------------
123 blue_1 DOWN
140 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 140 -1.1010023919147742 -1.9323538328519287
blue_1 False False 140 -0.10100175858144098 -1.3095124649271233
-------------------------- Scene: 0 --------------------------
108 blue_0 DOWN
175 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 175 -0.10100165119072976 -1.305061735552201
blue_1 False False 175 -1.1010019178573964 -3.8376991792112864
-------------------------- Scene: 0 --------------------------
51 blue_1 DOWN
110 blue_0 DOWN
TIME LIMIT LOSE
blue_0 Fal

48 blue_0 DOWN
96 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 96 -0.10100060740303117 -1.3039932707235922
blue_1 False False 96 -1.1009908074030312 -1.3055233048543906
-------------------------- Scene: 0 --------------------------
95 blue_0 DOWN
130 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 130 -0.10100207268760189 -1.918230526964932
blue_1 False False 130 -1.1009963060209351 -1.4113120088838373
-------------------------- Scene: 0 --------------------------
46 blue_0 DOWN
46 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 46 -1.1009963971108179 -1.3050196781427301
blue_1 False False 46 -1.1010064304441511 -1.3050555114760636
-------------------------- Scene: 0 --------------------------
72 blue_0 DOWN
76 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 76 -0.10100001655950049 -1.509202735325964
blue_1 False False 76 -1.1010030832261672 -1.2029611268243188
-------------------------- Scene: 0 --------------------------
367 blue_0 DOWN
405 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False Fal

2022-03-25 01:48:31,727	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000035\checkpoint-35
2022-03-25 01:48:31,742	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 35, '_timesteps_total': 440904, '_time_total': 23078.457185268402, '_episodes_total': 1284}


agent_timesteps_total: 448772
custom_metrics: {}
date: 2022-03-25_01-48-31
done: false
episode_len_mean: 148.32
episode_media: {}
episode_reward_max: 133.03764687317897
episode_reward_mean: -1.6933873109659858
episode_reward_min: -9.001080128414213
episodes_this_iter: 43
episodes_total: 1284
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 167.15643310546875
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 39.999996185302734
        model: {}
        policy_loss: 4.844099998474121
        var_gnorm: 45.587032318115234
        vf_explained_var: 0.5624222159385681
        vf_loss: 0.09303189814090729
  learner_queue:
    size_count: 510
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 448772
  num_steps_sampled: 224386
  num_step

53 blue_1 DOWN
85 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 85 -1.1010019397728554 -1.3063036734574482
blue_1 False False 85 -0.10100133977285541 -1.303952212999803
-------------------------- Scene: 0 --------------------------
43 blue_1 DOWN
321 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 321 -1.1010032647751824 -6.209814974754913
blue_1 False False 321 -0.10100166477518246 -1.204270810662404
-------------------------- Scene: 0 --------------------------
43 blue_0 DOWN
58 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 58 -0.10100083001444023 -1.2050725554586001
blue_1 False False 58 -1.101013196681107 -1.405969671093295
-------------------------- Scene: 0 --------------------------
43 blue_1 DOWN
78 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 78 -1.1009946494650102 -1.3061229704175699
blue_1 False False 78 -0.10100038279834365 -1.2041757889618647
-------------------------- Scene: 0 --------------------------
88 blue_1 DOWN
120 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 

83 blue_1 DOWN
270 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 270 -1.1009995878156518 -1.9469996576793014
blue_1 False False 270 -0.10100202114898511 -1.3050405814888286
-------------------------- Scene: 0 --------------------------
91 blue_1 DOWN
241 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 241 -1.101002203109761 -3.052501063813386
blue_1 False False 241 -0.10100163644309441 -1.405478500705093
-------------------------- Scene: 0 --------------------------


2022-03-25 02:00:41,059	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000036\checkpoint-36
2022-03-25 02:00:41,059	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 36, '_timesteps_total': 454024, '_time_total': 23733.610256433487, '_episodes_total': 1324}


agent_timesteps_total: 460816
custom_metrics: {}
date: 2022-03-25_02-00-40
done: false
episode_len_mean: 144.73
episode_media: {}
episode_reward_max: -2.408013285754074
episode_reward_mean: -3.21225213121154
episode_reward_min: -9.001080128414213
episodes_this_iter: 40
episodes_total: 1324
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 171.3876495361328
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: -0.39832985401153564
        var_gnorm: 45.609352111816406
        vf_explained_var: 0.021849334239959717
        vf_loss: 0.0006403328152373433
  learner_queue:
    size_count: 530
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 460816
  num_steps_sampled: 230408
  num_steps_trained:

116 blue_0 DOWN
241 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 241 -0.10100155513876287 -1.3061109300719314
blue_1 False False 241 -1.1010049218054296 -1.4262765551161642
-------------------------- Scene: 0 --------------------------
66 blue_1 DOWN
67 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 67 -1.1009969815329925 -1.305056462306748
blue_1 False False 67 -0.11313698153299243 -1.5190953649153567
-------------------------- Scene: 0 --------------------------
57 blue_0 DOWN
115 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 115 -0.10100184702949982 -1.3041305901753935
blue_1 False False 115 -1.1009974136961664 -1.3051647488165101
-------------------------- Scene: 0 --------------------------
81 blue_1 DOWN
127 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 127 -1.1010022728292528 -2.1282405840290064
blue_1 False False 127 -0.10100200616258617 -1.5070608878589347
-------------------------- Scene: 0 --------------------------
75 blue_0 DOWN
202 blue_1 DOWN
TIME LIMIT LOSE
blue_0 F

87 blue_0 DOWN
101 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 101 -0.10100174815032667 -1.30509670203419
blue_1 False False 101 -1.10100538148366 -1.510312198232537
-------------------------- Scene: 0 --------------------------
190 blue_1 DOWN
343 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 343 -1.101001691422909 -4.374886681929191
blue_1 False False 343 -0.10100155808957571 -1.417430815478921
-------------------------- Scene: 0 --------------------------
84 blue_1 DOWN
95 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 95 -1.1010013305776012 -1.204299742872101
blue_1 False False 95 -0.10100166391093457 -1.3063300575766854
-------------------------- Scene: 0 --------------------------
123 blue_1 DOWN
280 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 280 -1.1010053959789976 -2.6714902185103933
blue_1 False False 280 -0.10100166264566417 -1.305240696718954
-------------------------- Scene: 0 --------------------------
125 blue_0 DOWN
133 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False Fa

2022-03-25 02:13:44,739	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000037\checkpoint-37
2022-03-25 02:13:44,739	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 37, '_timesteps_total': 466360, '_time_total': 24442.87261247635, '_episodes_total': 1369}


agent_timesteps_total: 473598
custom_metrics: {}
date: 2022-03-25_02-13-44
done: false
episode_len_mean: 145.37
episode_media: {}
episode_reward_max: -2.5090753154015166
episode_reward_mean: -3.271124923458404
episode_reward_min: -9.001080128414213
episodes_this_iter: 45
episodes_total: 1369
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 176.34600830078125
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: -0.04108641296625137
        var_gnorm: 45.64118194580078
        vf_explained_var: 0.30043724179267883
        vf_loss: 0.0001442988432245329
  learner_queue:
    size_count: 550
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 473598
  num_steps_sampled: 236799
  num_steps_trained

123 blue_1 DOWN
546 blue_0 Shoot at red_1 launch distance : 52752.986480469976 True True
561 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 561 -1.1049997625889627 3.16927022721576
blue_1 False False 561 -0.10100196258896271 -1.3055309680809704
-------------------------- Scene: 0 --------------------------
90 blue_0 DOWN
92 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 92 -0.1010005240753728 -1.4062812937086449
blue_1 False False 92 -1.101002057408706 -1.313143860953038
-------------------------- Scene: 0 --------------------------
78 blue_1 DOWN
95 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 95 -1.101002256122421 -1.3060513032000143
blue_1 False False 95 -0.10100202278908772 -1.4069503474478462
-------------------------- Scene: 0 --------------------------
46 blue_0 DOWN
67 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 67 -0.10100135571709598 -1.3048623875829604
blue_1 False False 67 -1.100997355717096 -1.30485535786685
-------------------------- Scene: 0 ------------------------

46 blue_0 DOWN
69 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 69 -0.10100086443223058 -1.3049717691340592
blue_1 False False 69 -1.1010160310988972 -1.508384465554339
-------------------------- Scene: 0 --------------------------
123 blue_1 DOWN
150 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 150 -1.1010013635083706 -1.40698897902828
blue_1 False False 150 -0.1010016635083707 -2.0243320178857367
-------------------------- Scene: 0 --------------------------


2022-03-25 02:25:48,657	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000038\checkpoint-38
2022-03-25 02:25:48,657	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 38, '_timesteps_total': 478696, '_time_total': 25093.93829536438, '_episodes_total': 1409}


agent_timesteps_total: 485732
custom_metrics: {}
date: 2022-03-25_02-25-48
done: false
episode_len_mean: 154.19
episode_media: {}
episode_reward_max: 1.863739259134792
episode_reward_mean: -3.138821042710155
episode_reward_min: -6.069661338458774
episodes_this_iter: 40
episodes_total: 1409
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 181.52157592773438
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 39.9999885559082
        model: {}
        policy_loss: 0.039232850074768066
        var_gnorm: 45.69652557373047
        vf_explained_var: 0.48905447125434875
        vf_loss: 0.00012649959535337985
  learner_queue:
    size_count: 570
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 485732
  num_steps_sampled: 242866
  num_st

45 blue_1 DOWN
191 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 191 -1.1010016086116157 -1.3048913027739513
blue_1 False False 191 -0.10100240861161561 -1.2038343317600937
-------------------------- Scene: 0 --------------------------
61 blue_0 DOWN
128 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 128 -0.10100166330877813 -1.3050592288505896
blue_1 False False 128 -1.100997963308778 -1.508008633933572
-------------------------- Scene: 0 --------------------------
83 blue_1 DOWN
98 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 98 -1.1010078489679331 -1.3053336246614016
blue_1 False False 98 -0.10100058230126642 -1.3049964160434802
-------------------------- Scene: 0 --------------------------
154 blue_1 DOWN
211 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 211 -1.1009982396870035 -2.237450580246569
blue_1 False False 211 -0.10100170635367015 -2.2423507577789317
-------------------------- Scene: 0 --------------------------
81 blue_1 DOWN
95 blue_0 DOWN
TIME LIMIT LOSE
blue_0 Fals

2022-03-25 02:37:51,101	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000039\checkpoint-39
2022-03-25 02:37:51,101	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 39, '_timesteps_total': 491032, '_time_total': 25743.066980838776, '_episodes_total': 1446}


agent_timesteps_total: 497990
custom_metrics: {}
date: 2022-03-25_02-37-50
done: false
episode_len_mean: 155.29
episode_media: {}
episode_reward_max: 1.863739259134792
episode_reward_mean: -3.1784683236455753
episode_reward_min: -8.706753932556218
episodes_this_iter: 37
episodes_total: 1446
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 186.66958618164062
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.00000762939453
        model: {}
        policy_loss: -0.007971702143549919
        var_gnorm: 45.75497817993164
        vf_explained_var: 0.5091924667358398
        vf_loss: 0.00017262433539144695
  learner_queue:
    size_count: 590
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 497990
  num_steps_sampled: 248995
  num_

60 blue_0 DOWN
262 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 262 -0.10100122696609033 -1.3038664170624938
blue_1 False False 262 -1.1009982602994237 -3.9733979398753423
-------------------------- Scene: 0 --------------------------
112 blue_1 DOWN
245 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 245 -1.1010045449420969 -1.4200156350650723
blue_1 False False 245 -0.10100224494209681 -1.4656707325944867
-------------------------- Scene: 0 --------------------------
50 blue_1 DOWN
68 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 68 -1.101011732511173 -1.3048353812954006
blue_1 False False 68 -0.10100049917783968 -1.304994714915077
-------------------------- Scene: 0 --------------------------
66 blue_1 DOWN
118 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 118 -1.101000886181674 -1.3063176728469426
blue_1 False False 118 -0.10100165284834067 -1.2040999384925708
-------------------------- Scene: 0 --------------------------
86 blue_0 DOWN
126 blue_1 DOWN
TIME LIMIT LOSE
blue_0 Fal

2022-03-25 02:49:46,211	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000040\checkpoint-40
2022-03-25 02:49:46,211	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 40, '_timesteps_total': 503368, '_time_total': 26386.09003305435, '_episodes_total': 1484}


agent_timesteps_total: 510098
custom_metrics: {}
date: 2022-03-25_02-49-45
done: false
episode_len_mean: 160.84
episode_media: {}
episode_reward_max: 1.863739259134792
episode_reward_mean: -3.2374192030540003
episode_reward_min: -8.706753932556218
episodes_this_iter: 38
episodes_total: 1484
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 191.97386169433594
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: -0.04727393761277199
        var_gnorm: 45.81053161621094
        vf_explained_var: 0.8496168851852417
        vf_loss: 0.00018259296484757215
  learner_queue:
    size_count: 610
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 510098
  num_steps_sampled: 255049
  num_steps_trained:

182 blue_0 DOWN
302 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 302 -0.10100231594533329 -2.2209299646872056
blue_1 False False 302 -1.1010056492786666 -2.1792787442986525
-------------------------- Scene: 0 --------------------------
84 blue_0 DOWN
204 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 204 -0.10100152682929928 -1.4062599267250113
blue_1 False False 204 -1.1010049601626326 -2.537833670996272
-------------------------- Scene: 0 --------------------------
160 blue_1 DOWN
497 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 497 -1.101001264256042 -4.944283708258141
blue_1 False False 497 -0.10100166425604215 -3.2411067249927226
-------------------------- Scene: 0 --------------------------
53 blue_0 DOWN
70 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 70 -0.10100182318588127 -1.3060665062943393
blue_1 False False 70 -1.1010052565192145 -1.3041428469271752
-------------------------- Scene: 0 --------------------------
75 blue_1 DOWN
204 blue_0 DOWN
TIME LIMIT LOSE
blue_0 Fa

44 blue_1 DOWN
70 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 70 -1.1010018730780804 -1.3051305954250196
blue_1 False False 70 -0.10100173974474701 -1.3039484361409794
-------------------------- Scene: 0 --------------------------
110 blue_0 DOWN
130 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 130 -0.1010017474627772 -1.306035219355383
blue_1 False False 130 -1.1010062474627773 -1.5480985057971401
-------------------------- Scene: 0 --------------------------


2022-03-25 03:02:12,419	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000041\checkpoint-41
2022-03-25 03:02:12,419	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 41, '_timesteps_total': 515704, '_time_total': 27066.762346982956, '_episodes_total': 1524}


agent_timesteps_total: 522914
custom_metrics: {}
date: 2022-03-25_03-02-12
done: false
episode_len_mean: 161.43
episode_media: {}
episode_reward_max: 1.0417704483914016
episode_reward_mean: -3.2663469396850866
episode_reward_min: -8.706753932556218
episodes_this_iter: 40
episodes_total: 1524
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 197.28257751464844
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 39.99999237060547
        model: {}
        policy_loss: -0.008615098893642426
        var_gnorm: 45.86570739746094
        vf_explained_var: 0.5514011383056641
        vf_loss: 8.112745126709342e-05
  learner_queue:
    size_count: 630
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 522914
  num_steps_sampled: 261457
  num_

76 blue_0 DOWN
228 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 228 -0.1010008571320327 -1.406119950657572
blue_1 False False 228 -1.1009976237986994 -1.3403260593867878
-------------------------- Scene: 0 --------------------------
181 blue_0 DOWN
283 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 283 -0.10100181734312494 -2.530139794231167
blue_1 False False 283 -1.1009969506764583 -3.7269784037662372
-------------------------- Scene: 0 --------------------------
89 blue_0 DOWN
130 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 130 -0.10100113885311666 -1.3039266900429105
blue_1 False False 130 -1.1010016388531167 -1.4064702887894245
-------------------------- Scene: 0 --------------------------
43 blue_1 DOWN
63 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 63 -1.1009952981824696 -1.3050335107093132
blue_1 False False 63 -0.10100166484913617 -1.203614373290119
-------------------------- Scene: 0 --------------------------
45 blue_1 DOWN
328 blue_0 DOWN
TIME LIMIT LOSE
blue_0 Fals

2022-03-25 03:14:33,263	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000042\checkpoint-42
2022-03-25 03:14:33,263	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 42, '_timesteps_total': 528040, '_time_total': 27737.95699238777, '_episodes_total': 1561}


agent_timesteps_total: 535916
custom_metrics: {}
date: 2022-03-25_03-14-33
done: false
episode_len_mean: 161.11
episode_media: {}
episode_reward_max: 1.0417704483914016
episode_reward_mean: -3.2264267554412713
episode_reward_min: -8.18539043325086
episodes_this_iter: 37
episodes_total: 1561
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 202.60154724121094
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: -0.02255999483168125
        var_gnorm: 45.916419982910156
        vf_explained_var: 0.46496233344078064
        vf_loss: 0.00010618431406328455
  learner_queue:
    size_count: 650
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 535916
  num_steps_sampled: 267958
  num_steps_traine

121 blue_1 DOWN
129 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 129 -1.1009958809569818 -2.2289647897400306
blue_1 False False 129 -0.10100178095698183 -1.306152462521123
-------------------------- Scene: 0 --------------------------
44 blue_0 DOWN
55 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 55 -0.10100093198307077 -1.3049874344722823
blue_1 False False 55 -1.1009887986497375 -1.5071519646977616
-------------------------- Scene: 0 --------------------------
95 blue_0 DOWN
139 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 139 -0.10100170645140458 -1.3053702035817065
blue_1 False False 139 -1.1009971064514046 -2.170573367202657
-------------------------- Scene: 0 --------------------------
52 blue_1 DOWN
59 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 59 -1.1010084897327255 -1.4060581889580823
blue_1 False False 59 -0.10100185639939208 -1.3047955657001378
-------------------------- Scene: 0 --------------------------
58 blue_0 DOWN
76 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False 

98 blue_1 DOWN
113 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 113 -1.1010022415607286 -2.3300203290442654
blue_1 False False 113 -0.10100007489406183 -1.4054127970478525
-------------------------- Scene: 0 --------------------------
50 blue_0 DOWN
89 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 89 -0.1010018889550055 -1.2040230202363824
blue_1 False False 89 -1.1010021889550055 -1.3060324663243248
-------------------------- Scene: 0 --------------------------
97 blue_1 DOWN
99 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 99 -1.1010086712321467 -1.508019469947707
blue_1 False False 99 -0.10100247123214684 -1.816641667896518
-------------------------- Scene: 0 --------------------------
102 blue_1 DOWN
174 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 174 -1.1009873662101801 -1.204049671704091
blue_1 False False 174 -0.10100033287684672 -1.3049478729786024
-------------------------- Scene: 0 --------------------------
62 blue_0 DOWN
75 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False Fa

2022-03-25 03:27:22,589	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000043\checkpoint-43
2022-03-25 03:27:22,589	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 43, '_timesteps_total': 540376, '_time_total': 28435.387996196747, '_episodes_total': 1607}


agent_timesteps_total: 548514
custom_metrics: {}
date: 2022-03-25_03-27-22
done: false
episode_len_mean: 158.0
episode_media: {}
episode_reward_max: 1.0417704483914016
episode_reward_mean: -3.119663902493965
episode_reward_min: -7.375337137990819
episodes_this_iter: 46
episodes_total: 1607
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 207.90281677246094
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.00000762939453
        model: {}
        policy_loss: -0.038280993700027466
        var_gnorm: 45.96669006347656
        vf_explained_var: 0.8055529594421387
        vf_loss: 0.000244556664256379
  learner_queue:
    size_count: 670
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 548514
  num_steps_sampled: 274257
  num_ste

108 blue_0 DOWN
136 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 136 -0.1010006654701765 -1.2042756968270023
blue_1 False False 136 -1.1010001321368432 -1.3056092367304128
-------------------------- Scene: 0 --------------------------
108 blue_0 DOWN
132 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 132 -0.10100160058411564 -1.305429788336587
blue_1 False False 132 -1.101004133917449 -1.4285891980401766
-------------------------- Scene: 0 --------------------------
71 blue_1 DOWN
146 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 146 -1.1009873813439506 -1.406536447280107
blue_1 False False 146 -0.10100094801061733 -1.4050589003524692
-------------------------- Scene: 0 --------------------------
117 blue_1 DOWN
184 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 184 -1.100999524577811 -2.5440919118589367
blue_1 False False 184 -0.1010000579111444 -1.3171836355841386
-------------------------- Scene: 0 --------------------------
45 blue_1 DOWN
114 blue_0 DOWN
TIME LIMIT LOSE
blue_0 F

49 blue_0 DOWN
150 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 150 -0.1010008314470107 -1.3049745853918322
blue_1 False False 150 -1.1010165314470106 -1.508005543823435
-------------------------- Scene: 0 --------------------------
121 blue_1 DOWN
140 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 140 -1.1009996475231785 -1.3052251213915507
blue_1 False False 140 -0.10100208085651172 -1.305255448103576
-------------------------- Scene: 0 --------------------------
51 blue_0 DOWN
73 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 73 -0.1010019062619377 -1.4060402522359339
blue_1 False False 73 -1.1010050729286043 -1.4070705766701077
-------------------------- Scene: 0 --------------------------
43 blue_1 DOWN
92 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 92 -1.101004098343141 -1.3060474422406512
blue_1 False False 92 -0.10100166500980766 -1.2038780298731042
-------------------------- Scene: 0 --------------------------
118 blue_1 DOWN
200 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False F

2022-03-25 03:39:58,571	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000044\checkpoint-44
2022-03-25 03:39:58,571	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 44, '_timesteps_total': 552712, '_time_total': 29125.36649632454, '_episodes_total': 1652}


agent_timesteps_total: 560790
custom_metrics: {}
date: 2022-03-25_03-39-58
done: false
episode_len_mean: 140.91
episode_media: {}
episode_reward_max: -2.435201663630732
episode_reward_mean: -3.0737429462293266
episode_reward_min: -7.375337137990819
episodes_this_iter: 45
episodes_total: 1652
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 213.0926513671875
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 39.99999237060547
        model: {}
        policy_loss: -0.06759587675333023
        var_gnorm: 46.02203369140625
        vf_explained_var: 0.26528894901275635
        vf_loss: 0.0002824380062520504
  learner_queue:
    size_count: 690
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 560790
  num_steps_sampled: 280395
  num_s

45 blue_0 DOWN
158 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 158 -0.10100114564602561 -1.203925118674759
blue_1 False False 158 -1.1010020456460257 -1.2038461312981648
-------------------------- Scene: 0 --------------------------
82 blue_1 DOWN
86 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 86 -1.10099768066019 -1.3051473911895946
blue_1 False False 86 -0.10100184732685659 -1.4068327675719217
-------------------------- Scene: 0 --------------------------
68 blue_1 DOWN
254 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 254 -1.1010047183888516 -1.4104645019615796
blue_1 False False 254 -0.10100225172218492 -1.3056091840914141
-------------------------- Scene: 0 --------------------------
49 blue_1 DOWN
617 blue_0 Shoot at red_1 launch distance : 59654.39412824877 True True
629 blue_0 Shoot at red_1 launch distance : 54697.558986775446 True True
667 red_0 Shoot at blue_0
681 red_0 Shoot at blue_0
685 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 685 -0.20699372311752662 33.4058

76 blue_0 DOWN
521 blue_1 Shoot at red_0 launch distance : 57097.96321069207 True True
547 blue_1 Shoot at red_0 launch distance : 49052.612000558045 True True
620 red_1 Shoot at blue_1
632 red_1 Shoot at blue_1
663 blue_1 Splash :red_0
970 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 970 -0.10100004463345962 -1.3051576669290006
blue_1 False False 970 -1.1070032113001262 101.93623049136087
-------------------------- Scene: 0 --------------------------


2022-03-25 03:52:38,016	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000045\checkpoint-45
2022-03-25 03:52:38,016	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 45, '_timesteps_total': 565048, '_time_total': 29815.863967895508, '_episodes_total': 1691}


agent_timesteps_total: 574006
custom_metrics: {}
date: 2022-03-25_03-52-37
done: false
episode_len_mean: 149.97
episode_media: {}
episode_reward_max: 100.63107282443185
episode_reward_mean: -1.714025811809401
episode_reward_min: -7.375337137990819
episodes_this_iter: 39
episodes_total: 1691
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 218.38479614257812
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: 0.02128610759973526
        var_gnorm: 46.07902145385742
        vf_explained_var: 0.4605637192726135
        vf_loss: 0.00019727999460883439
  learner_queue:
    size_count: 710
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 574006
  num_steps_sampled: 287003
  num_steps_trained: 

45 blue_1 DOWN
98 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 98 -1.101008264487052 -1.5082056594524744
blue_1 False False 98 -0.10100153115371868 -1.3051981188726025
-------------------------- Scene: 0 --------------------------
62 blue_1 DOWN
109 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 109 -1.1010022991527502 -1.3048631383191869
blue_1 False False 109 -0.10100063248608346 -1.4048509086819505
-------------------------- Scene: 0 --------------------------
50 blue_0 DOWN
93 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 93 -0.10100116389832389 -1.20399234702325
blue_1 False False 93 -1.1009953638983239 -1.3040949282698338
-------------------------- Scene: 0 --------------------------
46 blue_1 DOWN
93 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 93 -1.1010017822864322 -1.305267690369647
blue_1 False False 93 -0.10100101561976554 -1.2040843467702325
-------------------------- Scene: 0 --------------------------
53 blue_0 DOWN
129 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False

2022-03-25 04:04:27,085	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000046\checkpoint-46
2022-03-25 04:04:27,085	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 46, '_timesteps_total': 577778, '_time_total': 30453.53743815422, '_episodes_total': 1725}


agent_timesteps_total: 586386
custom_metrics: {}
date: 2022-03-25_04-04-26
done: false
episode_len_mean: 163.53
episode_media: {}
episode_reward_max: 100.63107282443185
episode_reward_mean: -1.894380298943109
episode_reward_min: -7.556005016872453
episodes_this_iter: 34
episodes_total: 1725
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 223.12417602539062
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 39.999996185302734
        model: {}
        policy_loss: 0.9263907670974731
        var_gnorm: 46.126678466796875
        vf_explained_var: 0.6751296520233154
        vf_loss: 0.004811153281480074
  learner_queue:
    size_count: 730
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 586386
  num_steps_sampled: 293193
  num_ste

61 blue_0 DOWN
125 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 125 -0.10100143221154577 -1.4060576316076425
blue_1 False False 125 -1.1010063655448792 -1.4062800840391798
-------------------------- Scene: 0 --------------------------
50 blue_1 DOWN
129 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 129 -1.1009997977667083 -1.204155476660619
blue_1 False False 129 -0.10100169776670823 -1.2039571241285305
-------------------------- Scene: 0 --------------------------
46 blue_0 DOWN
87 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 87 -0.10100157138746657 -1.507967964863276
blue_1 False False 87 -1.1010025713874665 -2.1188090941790056
-------------------------- Scene: 0 --------------------------
124 blue_0 DOWN
124 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 124 -1.1010012898590404 -1.4229254795853854
blue_1 False False 124 -1.1010014898590403 -1.305750779585385
-------------------------- Scene: 0 --------------------------
43 blue_1 DOWN
138 blue_0 DOWN
TIME LIMIT LOSE
blue_0 Fals

77 blue_0 DOWN
95 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 95 -0.10100001662822043 -1.2041896216207706
blue_1 False False 95 -1.1009957832948871 -1.2037936284963242
-------------------------- Scene: 0 --------------------------
106 blue_0 DOWN
197 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 197 -0.10100166093959821 -1.3063338461135068
blue_1 False False 197 -1.1010021609395981 -4.174151677403834
-------------------------- Scene: 0 --------------------------


2022-03-25 04:16:29,446	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000047\checkpoint-47
2022-03-25 04:16:29,446	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 47, '_timesteps_total': 590508, '_time_total': 31109.850129127502, '_episodes_total': 1765}


agent_timesteps_total: 598438
custom_metrics: {}
date: 2022-03-25_04-16-29
done: false
episode_len_mean: 168.76
episode_media: {}
episode_reward_max: 100.63107282443185
episode_reward_mean: -1.9603013719731424
episode_reward_min: -7.556005016872453
episodes_this_iter: 40
episodes_total: 1765
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 228.16372680664062
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: -0.18068937957286835
        var_gnorm: 46.166954040527344
        vf_explained_var: 0.15217214822769165
        vf_loss: 0.0003023309982381761
  learner_queue:
    size_count: 750
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 598438
  num_steps_sampled: 299219
  num_steps_traine

234 blue_1 DOWN
236 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 236 -1.1010036707899316 -5.2976386013198935
blue_1 False False 236 -0.10100160412326485 -3.9867678481983786
-------------------------- Scene: 0 --------------------------
68 blue_0 DOWN
95 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 95 -0.10100044905886717 -1.304014074875446
blue_1 False False 95 -1.1010127823922005 -1.3050234336404976
-------------------------- Scene: 0 --------------------------
143 blue_1 DOWN
206 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 206 -1.1010014734782532 -1.5547407600463157
blue_1 False False 206 -0.10100117347825312 -1.4062265404583882
-------------------------- Scene: 0 --------------------------
53 blue_0 DOWN
77 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 77 -0.10100222238857083 -1.3050136829811816
blue_1 False False 77 -1.1009957557219041 -1.4060432909784764
-------------------------- Scene: 0 --------------------------
78 blue_0 DOWN
184 blue_1 DOWN
TIME LIMIT LOSE
blue_0 Fal

2022-03-25 04:28:01,192	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000048\checkpoint-48
2022-03-25 04:28:01,192	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 48, '_timesteps_total': 603238, '_time_total': 31731.223196983337, '_episodes_total': 1797}


agent_timesteps_total: 610756
custom_metrics: {}
date: 2022-03-25_04-28-00
done: false
episode_len_mean: 174.25
episode_media: {}
episode_reward_max: 1.460454869618217
episode_reward_mean: -3.4638613693100275
episode_reward_min: -9.284406449518265
episodes_this_iter: 32
episodes_total: 1797
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 233.21255493164062
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.000003814697266
        model: {}
        policy_loss: -0.08692000806331635
        var_gnorm: 46.21284484863281
        vf_explained_var: 0.416268914937973
        vf_loss: 0.00014953891513869166
  learner_queue:
    size_count: 770
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 610756
  num_steps_sampled: 305378
  num_s

64 blue_1 DOWN
225 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 225 -1.1009945477812382 -2.881956777402483
blue_1 False False 225 -0.10100128111457159 -1.3047612737581835
-------------------------- Scene: 0 --------------------------
66 blue_0 DOWN
267 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 267 -0.10100161475206078 -1.3051084994567492
blue_1 False False 267 -1.1010017480853942 -3.513871411044714
-------------------------- Scene: 0 --------------------------
73 blue_1 DOWN
86 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 86 -1.1009985634841402 -1.3052025153654423
blue_1 False False 86 -0.10100166348414026 -1.3053104658644847
-------------------------- Scene: 0 --------------------------
54 blue_1 DOWN
118 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 118 -1.1010055796182434 -1.4069962753266783
blue_1 False False 118 -0.10100201295157667 -1.2042169444150073
-------------------------- Scene: 0 --------------------------
75 blue_1 DOWN
116 blue_0 DOWN
TIME LIMIT LOSE
blue_0 Fal

2022-03-25 04:40:11,056	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000049\checkpoint-49
2022-03-25 04:40:11,056	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 49, '_timesteps_total': 615968, '_time_total': 32385.97117114067, '_episodes_total': 1828}


agent_timesteps_total: 624024
custom_metrics: {}
date: 2022-03-25_04-40-10
done: false
episode_len_mean: 183.2
episode_media: {}
episode_reward_max: 101.22266294627437
episode_reward_mean: -2.1934648320929453
episode_reward_min: -9.284406449518265
episodes_this_iter: 31
episodes_total: 1828
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 238.71578979492188
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: 0.009628679603338242
        var_gnorm: 46.26668930053711
        vf_explained_var: 0.607125997543335
        vf_loss: 0.0001028935585054569
  learner_queue:
    size_count: 790
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 624024
  num_steps_sampled: 312012
  num_steps_trained: 6

595 blue_0 Splash :red_1
643 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 643 -1.1070066840339516 107.04618053094623
blue_1 False False 643 -0.10100095070061832 -1.4062704548005933
-------------------------- Scene: 0 --------------------------
103 blue_0 DOWN
173 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 173 -0.10100084054229987 -1.4063026986527052
blue_1 False False 173 -1.1009888738756333 -1.3052454069975796
-------------------------- Scene: 0 --------------------------
71 blue_1 DOWN
82 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 82 -1.1010033647300115 -1.3062437861111682
blue_1 False False 82 -0.10100166473001139 -1.3052541385996972
-------------------------- Scene: 0 --------------------------
43 blue_1 DOWN
95 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 95 -1.1010002302740698 -1.3062668294339639
blue_1 False False 95 -0.10100246360740318 -1.3053080060967606
-------------------------- Scene: 0 --------------------------
68 blue_0 DOWN
100 blue_1 DOWN
TIME LIMIT LOSE
b

2022-03-25 04:51:58,216	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000050\checkpoint-50
2022-03-25 04:51:58,216	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 50, '_timesteps_total': 628698, '_time_total': 33024.50432801247, '_episodes_total': 1864}


agent_timesteps_total: 636160
custom_metrics: {}
date: 2022-03-25_04-51-57
done: false
episode_len_mean: 190.58
episode_media: {}
episode_reward_max: 105.63991007614563
episode_reward_mean: -1.02305016403671
episode_reward_min: -9.284406449518265
episodes_this_iter: 36
episodes_total: 1864
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 242.59890747070312
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.00000762939453
        model: {}
        policy_loss: 7.990701675415039
        var_gnorm: 46.300697326660156
        vf_explained_var: 0.5839887261390686
        vf_loss: 0.13455486297607422
  learner_queue:
    size_count: 810
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 636160
  num_steps_sampled: 318080
  num_steps_t

64 blue_1 DOWN
116 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 116 -1.100998656603311 -1.305319608546637
blue_1 False False 116 -0.10100148993664435 -1.5071678027881057
-------------------------- Scene: 0 --------------------------
88 blue_1 DOWN
102 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 102 -1.1010059959144762 -1.3046622413739013
blue_1 False False 102 -0.10100166258114296 -1.4064162715453703
-------------------------- Scene: 0 --------------------------
171 blue_1 DOWN
270 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 270 -1.101004728642861 -2.6578741699217203
blue_1 False False 270 -0.10100092864286112 -2.758145726671466
-------------------------- Scene: 0 --------------------------
46 blue_0 DOWN
55 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 55 -0.1010006559535267 -1.3050584484291314
blue_1 False False 55 -1.10100628928686 -1.3052745941076425
-------------------------- Scene: 0 --------------------------
59 blue_1 DOWN
195 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False F

129 blue_1 DOWN
166 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 166 -1.1010009895288801 -1.3054539330707424
blue_1 False False 166 -0.10100182286221356 -1.3047973590852497
-------------------------- Scene: 0 --------------------------
65 blue_1 DOWN
80 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 80 -1.1009966653305352 -1.4071419005440906
blue_1 False False 80 -0.10100089866386854 -1.4049348845067389
-------------------------- Scene: 0 --------------------------
49 blue_0 DOWN
111 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 111 -0.10100212854898687 -1.3039592854972268
blue_1 False False 111 -1.1009997285489868 -1.305814850440869
-------------------------- Scene: 0 --------------------------
79 blue_1 DOWN
193 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 193 -1.1010010691505911 -2.2234644293904475
blue_1 False False 193 -0.10100156915059108 -1.3047953977086895
-------------------------- Scene: 0 --------------------------


2022-03-25 05:04:36,918	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000051\checkpoint-51
2022-03-25 05:04:36,918	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 51, '_timesteps_total': 641428, '_time_total': 33710.540707826614, '_episodes_total': 1906}


agent_timesteps_total: 648796
custom_metrics: {}
date: 2022-03-25_05-04-36
done: false
episode_len_mean: 167.96
episode_media: {}
episode_reward_max: 105.63991007614563
episode_reward_mean: -1.0335803426702725
episode_reward_min: -7.437428015628234
episodes_this_iter: 42
episodes_total: 1906
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 246.66067504882812
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.000003814697266
        model: {}
        policy_loss: 4.892644882202148
        var_gnorm: 46.33743667602539
        vf_explained_var: 0.710955798625946
        vf_loss: 0.073309987783432
  learner_queue:
    size_count: 830
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 648796
  num_steps_sampled: 324398
  num_steps_tr

48 blue_0 DOWN
96 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 96 -0.10100226827879312 -1.3050185778876358
blue_1 False False 96 -1.1010012349454599 -1.3050667993945202
-------------------------- Scene: 0 --------------------------
92 blue_0 DOWN
104 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 104 -0.10100165883768075 -1.3049894702645395
blue_1 False False 104 -1.1010038255043475 -1.3046352712966378
-------------------------- Scene: 0 --------------------------
290 blue_0 DOWN
306 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 306 -0.10100008274320404 -4.091595005079631
blue_1 False False 306 -1.1009902494098707 -3.6885399639129552
-------------------------- Scene: 0 --------------------------
146 blue_0 DOWN
156 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 156 -0.10100138723995779 -2.4322704768403267
blue_1 False False 156 -1.1010002872399578 -2.7531599262889808
-------------------------- Scene: 0 --------------------------
73 blue_1 DOWN
142 blue_0 DOWN
TIME LIMIT LOSE
blue_0 

78 blue_1 DOWN
181 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 181 -1.1009976717338554 -1.4757197345046873
blue_1 False False 181 -0.10100207173385536 -1.2030675326942688
-------------------------- Scene: 0 --------------------------
59 blue_1 DOWN
207 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 207 -1.1010010765726252 -4.178542879293939
blue_1 False False 207 -0.10100100990595863 -1.307179534733448
-------------------------- Scene: 0 --------------------------


2022-03-25 05:16:47,143	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000052\checkpoint-52
2022-03-25 05:16:47,143	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 52, '_timesteps_total': 654158, '_time_total': 34368.13260769844, '_episodes_total': 1946}


agent_timesteps_total: 660836
custom_metrics: {}
date: 2022-03-25_05-16-46
done: false
episode_len_mean: 153.27
episode_media: {}
episode_reward_max: -0.5645845775732591
episode_reward_mean: -3.2182489040658115
episode_reward_min: -7.780134968992583
episodes_this_iter: 40
episodes_total: 1946
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 251.6292724609375
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 39.99999237060547
        model: {}
        policy_loss: -0.4975869655609131
        var_gnorm: 46.38175964355469
        vf_explained_var: 0.17003369331359863
        vf_loss: 0.0008211205713450909
  learner_queue:
    size_count: 850
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 660836
  num_steps_sampled: 330418
  num_s

47 blue_0 DOWN
261 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 261 -0.10100106949776369 -1.3039984135585323
blue_1 False False 261 -1.1010012694977638 -2.8744389108899835
-------------------------- Scene: 0 --------------------------
76 blue_1 DOWN
235 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 235 -1.1009992136757065 -3.2611131966232785
blue_1 False False 235 -0.10100234700903978 -1.3053726249900712
-------------------------- Scene: 0 --------------------------
80 blue_1 DOWN
96 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 96 -1.1010131747271628 -1.408386771165132
blue_1 False False 96 -0.1010002080604961 -1.3049941612805982
-------------------------- Scene: 0 --------------------------
80 blue_0 DOWN
126 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 126 -0.10100170923764244 -1.2051431995994075
blue_1 False False 126 -1.1010039092376425 -2.0190394111286563
-------------------------- Scene: 0 --------------------------
66 blue_0 DOWN
87 blue_1 DOWN
TIME LIMIT LOSE
blue_0 Fals

91 blue_0 DOWN
202 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 202 -0.10100229549569348 -1.3051994276825167
blue_1 False False 202 -1.1010021954956934 -2.554736012980957
-------------------------- Scene: 0 --------------------------
88 blue_1 DOWN
116 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 116 -1.1009979737389997 -1.3063151298996447
blue_1 False False 116 -0.10100114040566643 -1.4054970690918345
-------------------------- Scene: 0 --------------------------
41 blue_0 DOWN
57 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 57 -0.10100158965067257 -1.203984457497765
blue_1 False False 57 -1.101005322984006 -1.3050755666912006
-------------------------- Scene: 0 --------------------------
55 blue_1 DOWN
98 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 98 -1.101009554093468 -1.305379182151183
blue_1 False False 98 -0.10100295409346795 -1.7112502204038451
-------------------------- Scene: 0 --------------------------
77 blue_1 DOWN
119 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False Fal

2022-03-25 05:29:30,798	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000053\checkpoint-53
2022-03-25 05:29:30,798	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 53, '_timesteps_total': 666888, '_time_total': 35056.92982339859, '_episodes_total': 1990}


agent_timesteps_total: 673264
custom_metrics: {}
date: 2022-03-25_05-29-30
done: false
episode_len_mean: 146.77
episode_media: {}
episode_reward_max: 2.468440709561058
episode_reward_mean: -3.2156808260592156
episode_reward_min: -7.780134968992583
episodes_this_iter: 44
episodes_total: 1990
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 256.6075439453125
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: -0.18063193559646606
        var_gnorm: 46.42292022705078
        vf_explained_var: 0.22537562251091003
        vf_loss: 0.00011724219802999869
  learner_queue:
    size_count: 870
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 673264
  num_steps_sampled: 336632
  num_steps_trained:

101 blue_0 DOWN
109 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 109 -0.10100116386173434 -1.3042424703006699
blue_1 False False 109 -1.100996130528401 -1.3062251155352986
-------------------------- Scene: 0 --------------------------
99 blue_0 DOWN
109 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 109 -0.10100143746418072 -1.305241374068236
blue_1 False False 109 -1.1009987041308473 -1.4064414439177895
-------------------------- Scene: 0 --------------------------
41 blue_0 DOWN
130 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 130 -0.1010014731148782 -1.1030000807894176
blue_1 False False 130 -1.1009966731148781 -1.3041884048924834
-------------------------- Scene: 0 --------------------------
64 blue_1 DOWN
80 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 80 -1.1009964735634012 -1.3052278096042893
blue_1 False False 80 -0.10100130689673462 -1.3046333139353534
-------------------------- Scene: 0 --------------------------
56 blue_1 DOWN
85 blue_0 DOWN
TIME LIMIT LOSE
blue_0 Fals

2022-03-25 05:41:29,860	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000054\checkpoint-54
2022-03-25 05:41:29,860	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 54, '_timesteps_total': 679618, '_time_total': 35699.8863389492, '_episodes_total': 2023}


agent_timesteps_total: 685986
custom_metrics: {}
date: 2022-03-25_05-41-29
done: false
episode_len_mean: 164.25
episode_media: {}
episode_reward_max: 2.468440709561058
episode_reward_mean: -3.345703270545793
episode_reward_min: -9.410748603090457
episodes_this_iter: 33
episodes_total: 2023
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 261.6517639160156
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.00000762939453
        model: {}
        policy_loss: -0.06178572401404381
        var_gnorm: 46.47010040283203
        vf_explained_var: 0.5464940071105957
        vf_loss: 9.259129001293331e-05
  learner_queue:
    size_count: 890
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 685986
  num_steps_sampled: 342993
  num_step

68 blue_0 DOWN
77 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 77 -0.10100247561092333 -1.3050613072984796
blue_1 False False 77 -1.1009966756109233 -1.5070562358177397
-------------------------- Scene: 0 --------------------------
51 blue_1 DOWN
99 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 99 -1.1009976966805524 -1.3050747845411022
blue_1 False False 99 -0.10100153001388561 -1.305314368121258
-------------------------- Scene: 0 --------------------------
68 blue_0 DOWN
79 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 79 -0.10100242790410695 -1.3050442443313253
blue_1 False False 79 -1.1010017945707735 -1.3038992467801698
-------------------------- Scene: 0 --------------------------
103 blue_1 DOWN
120 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 120 -1.1010003135631967 -1.3124518635205444
blue_1 False False 120 -0.10100161356319662 -1.3051826717862782
-------------------------- Scene: 0 --------------------------
84 blue_1 DOWN
140 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False F

2022-03-25 05:53:28,833	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000055\checkpoint-55
2022-03-25 05:53:28,833	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 55, '_timesteps_total': 692348, '_time_total': 36345.54913163185, '_episodes_total': 2060}


agent_timesteps_total: 698186
custom_metrics: {}
date: 2022-03-25_05-53-28
done: false
episode_len_mean: 170.9
episode_media: {}
episode_reward_max: 78.43733081158057
episode_reward_mean: -2.5209826015379067
episode_reward_min: -9.410748603090457
episodes_this_iter: 37
episodes_total: 2060
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 267.0963134765625
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: 0.015336848795413971
        var_gnorm: 46.52634048461914
        vf_explained_var: 0.4635011553764343
        vf_loss: 0.00021295982878655195
  learner_queue:
    size_count: 910
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 698186
  num_steps_sampled: 349093
  num_steps_trained: 6

109 blue_1 DOWN
288 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 288 -1.101004155905283 -5.4010827882393695
blue_1 False False 288 -0.10100172257194957 -1.3048392786676912
-------------------------- Scene: 0 --------------------------
87 blue_1 DOWN
128 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 128 -1.1010086389878788 -1.4070462375990915
blue_1 False False 128 -0.10100160565454538 -1.4061357355906736
-------------------------- Scene: 0 --------------------------
273 blue_0 DOWN
533 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 533 -0.10100192956088577 -1.9494888515397235
blue_1 False False 533 -1.1050024628942192 -1.5460078160569388
-------------------------- Scene: 0 --------------------------
75 blue_0 DOWN
285 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 285 -0.10100163889449187 -1.3051956082955687
blue_1 False False 285 -1.1009966388944918 -2.5690705471543684
-------------------------- Scene: 0 --------------------------
74 blue_1 DOWN
158 blue_0 DOWN
TIME LIMIT LOSE
blue

2022-03-25 06:05:12,349	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000056\checkpoint-56
2022-03-25 06:05:12,349	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 56, '_timesteps_total': 704874, '_time_total': 36975.6129014492, '_episodes_total': 2091}


agent_timesteps_total: 710876
custom_metrics: {}
date: 2022-03-25_06-05-12
done: false
episode_len_mean: 187.08
episode_media: {}
episode_reward_max: 78.43733081158057
episode_reward_mean: -2.1796500228000304
episode_reward_min: -9.410748603090457
episodes_this_iter: 31
episodes_total: 2091
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 271.30523681640625
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: 1.6532678604125977
        var_gnorm: 46.57059860229492
        vf_explained_var: 0.8573613166809082
        vf_loss: 0.009080159477889538
  learner_queue:
    size_count: 930
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 710876
  num_steps_sampled: 355438
  num_steps_trained: 704

74 blue_0 DOWN
158 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 158 -0.10100051952072266 -1.5072018402165221
blue_1 False False 158 -1.1009931861873894 -2.267659660310402
-------------------------- Scene: 0 --------------------------
74 blue_1 DOWN
92 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 92 -1.1010059988306273 -1.3051225466164191
blue_1 False False 92 -0.10100129883062728 -1.4060928144167344
-------------------------- Scene: 0 --------------------------
50 blue_0 DOWN
303 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 303 -0.1010013149803978 -1.3050421397000371
blue_1 False False 303 -1.1010022149803977 -4.177271104304372
-------------------------- Scene: 0 --------------------------
82 blue_1 DOWN
135 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 135 -1.1010105800245251 -1.3063254705455905
blue_1 False False 135 -0.1010022800245251 -1.2042690229193478
-------------------------- Scene: 0 --------------------------
42 blue_1 DOWN
126 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False

50 blue_1 DOWN
64 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 64 -1.100998455765939 -1.305113517045981
blue_1 False False 64 -0.10100202243260552 -1.2042877758460033
-------------------------- Scene: 0 --------------------------
62 blue_0 DOWN
62 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 62 -1.101000981328576 -1.3048180427046527
blue_1 False False 62 -1.1009974479952425 -1.3050733093713194
-------------------------- Scene: 0 --------------------------
48 blue_1 DOWN
57 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 57 -1.101003915366173 -1.304003372132103
blue_1 False False 57 -0.10100161536617311 -1.3045976193805202
-------------------------- Scene: 0 --------------------------


2022-03-25 06:17:28,921	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000057\checkpoint-57
2022-03-25 06:17:28,921	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 57, '_timesteps_total': 717400, '_time_total': 37638.89301943779, '_episodes_total': 2132}


agent_timesteps_total: 723258
custom_metrics: {}
date: 2022-03-25_06-17-28
done: false
episode_len_mean: 170.09
episode_media: {}
episode_reward_max: 38.39537439857698
episode_reward_mean: -2.947462405276438
episode_reward_min: -9.201298696020745
episodes_this_iter: 41
episodes_total: 2132
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 275.6507568359375
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: 2.398540496826172
        var_gnorm: 46.60296630859375
        vf_explained_var: 0.8062713146209717
        vf_loss: 0.028857627883553505
  learner_queue:
    size_count: 950
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 723258
  num_steps_sampled: 361629
  num_steps_trained: 717400

86 blue_1 DOWN
111 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 111 -1.100999031252527 -1.4134558628441716
blue_1 False False 111 -0.10100166458586045 -1.30501122965219
-------------------------- Scene: 0 --------------------------
202 blue_1 DOWN
236 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 236 -1.101000278396837 -4.092663686116116
blue_1 False False 236 -0.1010017117301702 -2.326138074349604
-------------------------- Scene: 0 --------------------------
65 blue_1 DOWN
77 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 77 -1.1009943888301557 -1.507073028725733
blue_1 False False 77 -0.10100205549682227 -1.508200708563059
-------------------------- Scene: 0 --------------------------
58 blue_0 DOWN
92 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 92 -0.10100123961533441 -1.3050772450186179
blue_1 False False 92 -1.1010062396153344 -1.4071736058402715
-------------------------- Scene: 0 --------------------------
44 blue_0 DOWN
97 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 9

98 blue_1 DOWN
133 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 133 -1.1010019782564548 -1.3141318149808496
blue_1 False False 133 -0.10100201158978817 -1.3051113529559513
-------------------------- Scene: 0 --------------------------
107 blue_1 DOWN
120 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 120 -1.1010030235353034 -1.3055245481830025
blue_1 False False 120 -0.10100159020197019 -1.344692460940243
-------------------------- Scene: 0 --------------------------
189 blue_1 DOWN
199 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 199 -1.1009969588570494 -1.4405147877148001
blue_1 False False 199 -0.10100175885704939 -2.339577689493382
-------------------------- Scene: 0 --------------------------
76 blue_1 DOWN
200 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 200 -1.101000681786627 -1.7450349999996937
blue_1 False False 200 -0.10100104845329363 -1.3041370067152416
-------------------------- Scene: 0 --------------------------
47 blue_1 DOWN
256 blue_0 DOWN
TIME LIMIT LOSE
blue_0

2022-03-25 06:30:20,572	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000058\checkpoint-58
2022-03-25 06:30:20,572	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 58, '_timesteps_total': 729926, '_time_total': 38340.24009466171, '_episodes_total': 2181}


agent_timesteps_total: 735584
custom_metrics: {}
date: 2022-03-25_06-30-20
done: false
episode_len_mean: 145.84
episode_media: {}
episode_reward_max: 38.39537439857698
episode_reward_mean: -2.778936571120728
episode_reward_min: -9.201298696020745
episodes_this_iter: 49
episodes_total: 2181
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 280.32635498046875
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: -0.24567994475364685
        var_gnorm: 46.644134521484375
        vf_explained_var: 0.15989530086517334
        vf_loss: 0.0002027818700298667
  learner_queue:
    size_count: 970
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 735584
  num_steps_sampled: 367792
  num_steps_trained:

97 blue_1 DOWN
115 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 115 -1.101009245300628 -1.4082332320706712
blue_1 False False 115 -0.10100307863396137 -1.3050808459239231
-------------------------- Scene: 0 --------------------------
78 blue_0 DOWN
119 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 119 -0.10100168902374598 -1.3071699061688975
blue_1 False False 119 -1.1009994223570794 -1.4060578315666672
-------------------------- Scene: 0 --------------------------
68 blue_1 DOWN
90 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 90 -1.1010020644548846 -1.3052444771175598
blue_1 False False 90 -0.10100143112155129 -1.30520182857063
-------------------------- Scene: 0 --------------------------
44 blue_0 DOWN
122 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 122 -0.10100131363819809 -1.2040014472156204
blue_1 False False 122 -1.1010036803048648 -1.3151980978762463
-------------------------- Scene: 0 --------------------------
51 blue_1 DOWN
56 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False

41 blue_0 DOWN
47 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 47 -0.10100059090812363 -1.303968688842072
blue_1 False False 47 -1.1010125909081236 -1.3057005343091812
-------------------------- Scene: 0 --------------------------
68 blue_1 DOWN
83 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 83 -1.1010068966736986 -1.3050285210278874
blue_1 False False 83 -0.10100173000703182 -1.305262403649279
-------------------------- Scene: 0 --------------------------
83 blue_0 DOWN
442 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 442 -0.10100082677365242 -1.3052496779437666
blue_1 False False 442 -0.7049889267736524 2.7289351818757863
-------------------------- Scene: 0 --------------------------


2022-03-25 06:42:41,092	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000059\checkpoint-59
2022-03-25 06:42:41,092	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 59, '_timesteps_total': 742452, '_time_total': 39005.662821769714, '_episodes_total': 2222}


agent_timesteps_total: 747840
custom_metrics: {}
date: 2022-03-25_06-42-40
done: false
episode_len_mean: 134.63
episode_media: {}
episode_reward_max: 1.4236855039320202
episode_reward_mean: -3.077201171423227
episode_reward_min: -7.397291248542034
episodes_this_iter: 41
episodes_total: 2222
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 285.3096923828125
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 39.99999237060547
        model: {}
        policy_loss: 0.004933318123221397
        var_gnorm: 46.69779968261719
        vf_explained_var: 0.44407692551612854
        vf_loss: 0.0001683130394667387
  learner_queue:
    size_count: 990
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 747840
  num_steps_sampled: 373920
  num_st

75 blue_0 DOWN
90 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 90 -0.10100163689704207 -1.3051288108493926
blue_1 False False 90 -1.1009999702303754 -1.4060229519573724
-------------------------- Scene: 0 --------------------------
49 blue_1 DOWN
55 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 55 -1.1009921696416445 -1.4060115662113497
blue_1 False False 55 -0.10100213630831108 -1.304834640260157
-------------------------- Scene: 0 --------------------------
55 blue_0 DOWN
332 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 332 -0.10100123714581169 -1.3049912379698947
blue_1 False False 332 -1.1010055371458116 -4.515543185473456
-------------------------- Scene: 0 --------------------------
82 blue_1 DOWN
152 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 152 -1.1010048911271522 -1.354762555115376
blue_1 False False 152 -0.10100042446048552 -1.405836423472473
-------------------------- Scene: 0 --------------------------
59 blue_0 DOWN
72 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False Fal

2022-03-25 06:54:19,202	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000060\checkpoint-60
2022-03-25 06:54:19,202	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 60, '_timesteps_total': 754978, '_time_total': 39628.93854546547, '_episodes_total': 2253}


agent_timesteps_total: 760452
custom_metrics: {}
date: 2022-03-25_06-54-18
done: false
episode_len_mean: 164.38
episode_media: {}
episode_reward_max: 63.94030351848543
episode_reward_mean: -2.1206856629041395
episode_reward_min: -7.397291248542034
episodes_this_iter: 31
episodes_total: 2253
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 290.7134704589844
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.00001525878906
        model: {}
        policy_loss: 0.010205412283539772
        var_gnorm: 46.761985778808594
        vf_explained_var: 0.5471299290657043
        vf_loss: 0.00014229078078642488
  learner_queue:
    size_count: 1010
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 760452
  num_steps_sampled: 380226
  num_

73 blue_1 DOWN
406 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 406 -1.1010022229449918 -3.024503185601441
blue_1 False False 406 -0.1010013896116584 -1.4058691570972648
-------------------------- Scene: 0 --------------------------
67 blue_0 DOWN
115 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 115 -0.10100186027646756 -1.305055454015972
blue_1 False False 115 -1.1010011269431341 -1.3674586019798274
-------------------------- Scene: 0 --------------------------
84 blue_1 DOWN
86 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 86 -1.101004097601903 -1.3061067890277076
blue_1 False False 86 -0.10100166426856971 -1.3050820025745238
-------------------------- Scene: 0 --------------------------
121 blue_0 DOWN
170 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 170 -0.1010022114133373 -1.4163497971813594
blue_1 False False 170 -1.101005778080004 -1.320892467233811
-------------------------- Scene: 0 --------------------------
59 blue_0 DOWN
195 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False F

2022-03-25 07:06:15,767	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000061\checkpoint-61
2022-03-25 07:06:15,767	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 61, '_timesteps_total': 767504, '_time_total': 40272.06237077713, '_episodes_total': 2288}


agent_timesteps_total: 772938
custom_metrics: {}
date: 2022-03-25_07-06-15
done: false
episode_len_mean: 177.76
episode_media: {}
episode_reward_max: 130.5677069622673
episode_reward_mean: -0.9083394508324321
episode_reward_min: -7.42191906914939
episodes_this_iter: 35
episodes_total: 2288
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 294.31842041015625
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: 4.588958740234375
        var_gnorm: 46.80530548095703
        vf_explained_var: 0.8340868353843689
        vf_loss: 0.024528255686163902
  learner_queue:
    size_count: 1030
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 772938
  num_steps_sampled: 386469
  num_steps_trained: 7675

80 blue_0 DOWN
260 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 260 -0.10100220923571518 -1.3051762817821102
blue_1 False False 260 -1.1010020759023817 -1.752935209929244
-------------------------- Scene: 0 --------------------------
66 blue_0 DOWN
69 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 69 -0.10100135661095905 -1.305100866190212
blue_1 False False 69 -1.1010006899442923 -1.3063091934861706
-------------------------- Scene: 0 --------------------------
76 blue_1 DOWN
88 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 88 -1.1009963151441182 -1.6079960317346413
blue_1 False False 88 -0.10100054847745159 -1.305314629047076
-------------------------- Scene: 0 --------------------------
46 blue_1 DOWN
103 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 103 -1.1009988884821815 -2.121308750928387
blue_1 False False 103 -0.10100168848218148 -1.3049621916194745
-------------------------- Scene: 0 --------------------------
61 blue_0 DOWN
94 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False Fal

104 blue_0 DOWN
161 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 161 -0.10100193547806176 -1.406359010420541
blue_1 False False 161 -1.1010045688113952 -2.224905747993317
-------------------------- Scene: 0 --------------------------
53 blue_0 DOWN
88 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 88 -0.10100231407144794 -1.3050309326520655
blue_1 False False 88 -1.100999114071448 -1.3063679667260746
-------------------------- Scene: 0 --------------------------
83 blue_0 DOWN
89 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 89 -0.10100165536345636 -1.3051603982363735
blue_1 False False 89 -1.101004222030123 -1.4060976485348013
-------------------------- Scene: 0 --------------------------
153 blue_1 DOWN
498 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 498 -1.1050040048690615 -4.62666899175724
blue_1 False False 498 -0.10100127153572803 -1.3265814378347083
-------------------------- Scene: 0 --------------------------


2022-03-25 07:19:10,197	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000062\checkpoint-62
2022-03-25 07:19:10,197	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 62, '_timesteps_total': 780030, '_time_total': 40973.376739025116, '_episodes_total': 2330}


agent_timesteps_total: 786036
custom_metrics: {}
date: 2022-03-25_07-19-09
done: false
episode_len_mean: 171.91
episode_media: {}
episode_reward_max: 130.5677069622673
episode_reward_mean: -1.5857405388764527
episode_reward_min: -7.42191906914939
episodes_this_iter: 42
episodes_total: 2330
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 299.08587646484375
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 39.99999237060547
        model: {}
        policy_loss: -0.39554959535598755
        var_gnorm: 46.84488296508789
        vf_explained_var: 0.07986727356910706
        vf_loss: 0.00020659278379753232
  learner_queue:
    size_count: 1050
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 786036
  num_steps_sampled: 393018
  num_

46 blue_1 DOWN
119 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 119 -1.1010022046667964 -1.5070872278308662
blue_1 False False 119 -0.1010019380001298 -1.2041666841093255
-------------------------- Scene: 0 --------------------------
79 blue_1 DOWN
117 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 117 -1.1010057450245863 -2.32326481494604
blue_1 False False 117 -0.10100211169125291 -1.4060733903848308
-------------------------- Scene: 0 --------------------------
72 blue_0 DOWN
118 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 118 -0.1010012486352589 -1.3050663254342052
blue_1 False False 118 -1.1010070819685922 -2.2355400423976786
-------------------------- Scene: 0 --------------------------
72 blue_0 DOWN
391 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 391 -0.10100150147463276 -1.3051953833068468
blue_1 False False 391 -1.101000434807966 -5.440353417960104
-------------------------- Scene: 0 --------------------------
133 blue_1 DOWN
390 blue_0 DOWN
TIME LIMIT LOSE
blue_0 Fal

2022-03-25 07:31:04,833	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000063\checkpoint-63
2022-03-25 07:31:04,833	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 63, '_timesteps_total': 792556, '_time_total': 41615.783630132675, '_episodes_total': 2367}


agent_timesteps_total: 798212
custom_metrics: {}
date: 2022-03-25_07-31-04
done: false
episode_len_mean: 167.99
episode_media: {}
episode_reward_max: 130.5677069622673
episode_reward_mean: -1.9180944078740638
episode_reward_min: -7.167723260998313
episodes_this_iter: 37
episodes_total: 2367
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 304.01708984375
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 39.9999885559082
        model: {}
        policy_loss: -0.11003430187702179
        var_gnorm: 46.89179229736328
        vf_explained_var: 0.2717781960964203
        vf_loss: 0.0002858102379832417
  learner_queue:
    size_count: 1070
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 798212
  num_steps_sampled: 399106
  num_steps

55 blue_1 DOWN
123 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 123 -1.10100323564878 -1.3049475551350054
blue_1 False False 123 -0.10100143564877996 -1.3046932071424329
-------------------------- Scene: 0 --------------------------
47 blue_1 DOWN
556 blue_0 Shoot at red_0 launch distance : 59704.907508320954 True True
624 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 624 -0.6030074653709618 42.6160772160103
blue_1 False False 624 -0.10099973203762852 -1.3041734962928526
-------------------------- Scene: 0 --------------------------
53 blue_1 DOWN
114 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 114 -1.1010063981759384 -1.3054909849429266
blue_1 False False 114 -0.101001664842605 -1.406111905840012
-------------------------- Scene: 0 --------------------------
206 blue_0 DOWN
398 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 398 -0.10100175882917194 -1.4064312616210588
blue_1 False False 398 -1.1009993254958386 -3.247602820779001
-------------------------- Scene: 0 --------------

2022-03-25 07:42:35,254	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000064\checkpoint-64
2022-03-25 07:42:35,254	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 64, '_timesteps_total': 805082, '_time_total': 42234.0115981102, '_episodes_total': 2398}


agent_timesteps_total: 810440
custom_metrics: {}
date: 2022-03-25_07-42-35
done: false
episode_len_mean: 174.6
episode_media: {}
episode_reward_max: 41.311903719717435
episode_reward_mean: -2.750049755613212
episode_reward_min: -7.6562301151217715
episodes_this_iter: 31
episodes_total: 2398
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 309.260498046875
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.000003814697266
        model: {}
        policy_loss: -0.036534156650304794
        var_gnorm: 46.95006561279297
        vf_explained_var: 0.478025883436203
        vf_loss: 0.00011073832865804434
  learner_queue:
    size_count: 1090
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 810440
  num_steps_sampled: 405220
  num_s

68 blue_0 DOWN
88 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 88 -0.10100154753998308 -1.4059433356719817
blue_1 False False 88 -1.1009978808733165 -1.507241084030791
-------------------------- Scene: 0 --------------------------
71 blue_1 DOWN
162 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 162 -1.1010035390061967 -1.338563147210417
blue_1 False False 162 -0.10100210567286347 -1.2036570927267167
-------------------------- Scene: 0 --------------------------
117 blue_1 DOWN
142 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 142 -1.101003991825011 -1.5075100432644648
blue_1 False False 142 -0.10100165849167769 -1.3061042434861654
-------------------------- Scene: 0 --------------------------
47 blue_0 DOWN
79 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 79 -0.10100024093456068 -1.4060206105935589
blue_1 False False 79 -1.1009884076012273 -1.40588783667454
-------------------------- Scene: 0 --------------------------
45 blue_1 DOWN
89 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False Fal

71 blue_1 DOWN
310 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 310 -1.1009952677621928 -3.0665356453852572
blue_1 False False 310 -0.10100166776219276 -1.3062877654951701
-------------------------- Scene: 0 --------------------------


2022-03-25 07:54:55,184	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000065\checkpoint-65
2022-03-25 07:54:55,184	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 65, '_timesteps_total': 817608, '_time_total': 42900.19008779526, '_episodes_total': 2437}


agent_timesteps_total: 823028
custom_metrics: {}
date: 2022-03-25_07-54-54
done: false
episode_len_mean: 172.35
episode_media: {}
episode_reward_max: 41.311903719717435
episode_reward_mean: -2.8812322881562817
episode_reward_min: -7.845150887756553
episodes_this_iter: 39
episodes_total: 2437
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 313.81402587890625
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.000003814697266
        model: {}
        policy_loss: 1.6366736888885498
        var_gnorm: 47.00169372558594
        vf_explained_var: 0.917201042175293
        vf_loss: 0.006264421157538891
  learner_queue:
    size_count: 1110
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 823028
  num_steps_sampled: 411514
  num_ste

112 blue_0 DOWN
122 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 122 -0.10100217117363909 -1.3055119035457032
blue_1 False False 122 -1.1010076711736392 -1.30516129958428
-------------------------- Scene: 0 --------------------------
124 blue_0 DOWN
212 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 212 -0.10100168910421709 -1.6302396677141946
blue_1 False False 212 -1.101004489104217 -2.3320342553942965
-------------------------- Scene: 0 --------------------------
70 blue_1 DOWN
83 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 83 -1.1010071228475342 -1.5072089397545039
blue_1 False False 83 -0.10100232284753413 -1.3051710375806427
-------------------------- Scene: 0 --------------------------
105 blue_0 DOWN
231 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 231 -0.10100204951506613 -1.30527346541524
blue_1 False False 231 -1.101003549515066 -2.4456692259235973
-------------------------- Scene: 0 --------------------------
46 blue_0 DOWN
200 blue_1 DOWN
TIME LIMIT LOSE
blue_0 Fals

67 blue_1 DOWN
187 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 187 -1.1010020568283607 -1.9454555320752478
blue_1 False False 187 -0.1010008234950273 -1.709403753585073
-------------------------- Scene: 0 --------------------------
64 blue_0 DOWN
96 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 96 -0.10100039099597044 -1.3052125421325975
blue_1 False False 96 -1.1010002576626372 -1.519293309736725
-------------------------- Scene: 0 --------------------------
66 blue_0 DOWN
88 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 88 -0.10100125568850503 -1.3050891939324245
blue_1 False False 88 -1.101001755688505 -1.3048018838705304
-------------------------- Scene: 0 --------------------------
82 blue_1 DOWN
186 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 186 -1.100999272431498 -1.95111341830783
blue_1 False False 186 -0.10100187243149801 -1.3058278543848356
-------------------------- Scene: 0 --------------------------
44 blue_0 DOWN
135 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False

2022-03-25 08:07:11,330	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000066\checkpoint-66
2022-03-25 08:07:11,330	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 66, '_timesteps_total': 830134, '_time_total': 43562.946509599686, '_episodes_total': 2480}


agent_timesteps_total: 835034
custom_metrics: {}
date: 2022-03-25_08-07-11
done: false
episode_len_mean: 163.02
episode_media: {}
episode_reward_max: 41.311903719717435
episode_reward_mean: -2.6804189841260255
episode_reward_min: -7.845150887756553
episodes_this_iter: 43
episodes_total: 2480
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 318.64605712890625
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.000003814697266
        model: {}
        policy_loss: -0.2278943508863449
        var_gnorm: 47.05201721191406
        vf_explained_var: 0.2107548713684082
        vf_loss: 0.00021464753081090748
  learner_queue:
    size_count: 1130
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 835034
  num_steps_sampled: 417517
  num

114 blue_0 DOWN
165 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 165 -0.10100232165347901 -1.3063968216155528
blue_1 False False 165 -1.1010044549868123 -3.542541437443804
-------------------------- Scene: 0 --------------------------
45 blue_1 DOWN
118 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 118 -1.1009950295427817 -1.406284349695938
blue_1 False False 118 -0.10100166287611498 -1.305261811397138
-------------------------- Scene: 0 --------------------------
87 blue_0 DOWN
131 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 131 -0.10100166227835429 -1.418324298600689
blue_1 False False 131 -1.1010047956116877 -1.304991767029126
-------------------------- Scene: 0 --------------------------
75 blue_1 DOWN
167 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 167 -1.100997639595451 -3.9975960938627537
blue_1 False False 167 -0.10100177292878436 -1.3049398077436154
-------------------------- Scene: 0 --------------------------
79 blue_0 DOWN
101 blue_1 DOWN
TIME LIMIT LOSE
blue_0 Fal

127 blue_1 DOWN
155 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 155 -1.1010080556203403 -1.5255228447667457
blue_1 False False 155 -0.10100065562034038 -1.332219117834323
-------------------------- Scene: 0 --------------------------
64 blue_1 DOWN
91 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 91 -1.1010010976256792 -1.3051468033250493
blue_1 False False 91 -0.10100183095901259 -1.3047925609732904
-------------------------- Scene: 0 --------------------------
50 blue_0 DOWN
69 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 69 -0.10100166118230669 -1.4050146864439084
blue_1 False False 69 -1.1009946278489733 -1.304846950096238
-------------------------- Scene: 0 --------------------------
77 blue_0 DOWN
77 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 77 -1.1010000566413045 -1.3062809622171845
blue_1 False False 77 -1.1010063233079712 -1.3050712288838513
-------------------------- Scene: 0 --------------------------
60 blue_0 DOWN
77 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False Fals

2022-03-25 08:20:03,035	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000067\checkpoint-67
2022-03-25 08:20:03,035	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 67, '_timesteps_total': 842160, '_time_total': 44260.09501457214, '_episodes_total': 2527}


agent_timesteps_total: 847304
custom_metrics: {}
date: 2022-03-25_08-20-02
done: false
episode_len_mean: 138.42
episode_media: {}
episode_reward_max: -2.407562775569291
episode_reward_mean: -3.0698843683193173
episode_reward_min: -6.133824664930565
episodes_this_iter: 47
episodes_total: 2527
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 323.6451110839844
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.0
        model: {}
        policy_loss: -0.12080764025449753
        var_gnorm: 47.11096954345703
        vf_explained_var: 0.36848512291908264
        vf_loss: 0.00020771574054379016
  learner_queue:
    size_count: 1150
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 847304
  num_steps_sampled: 423652
  num_steps_traine

104 blue_0 DOWN
116 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 116 -0.10100036613163715 -1.4141205246820918
blue_1 False False 116 -1.1009952661316371 -1.2045572273905778
-------------------------- Scene: 0 --------------------------
92 blue_0 DOWN
116 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 116 -0.10100165697781009 -1.3052945771703381
blue_1 False False 116 -1.10100205697781 -1.3149085976342487
-------------------------- Scene: 0 --------------------------
58 blue_1 DOWN
155 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 155 -1.101009498673045 -1.3050829991361845
blue_1 False False 155 -0.10100119867304506 -1.4057761923561933
-------------------------- Scene: 0 --------------------------
48 blue_0 DOWN
78 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 78 -0.10100188050146276 -1.3049875561294928
blue_1 False False 78 -1.1010016471681294 -1.305398837019184
-------------------------- Scene: 0 --------------------------
92 blue_0 DOWN
100 blue_1 DOWN
TIME LIMIT LOSE
blue_0 Fals

2022-03-25 08:32:19,404	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000068\checkpoint-68
2022-03-25 08:32:19,404	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 68, '_timesteps_total': 854186, '_time_total': 44921.92763590813, '_episodes_total': 2565}


agent_timesteps_total: 859776
custom_metrics: {}
date: 2022-03-25_08-32-19
done: false
episode_len_mean: 143.95
episode_media: {}
episode_reward_max: -2.407562775569291
episode_reward_mean: -3.239518712051855
episode_reward_min: -9.730866131807193
episodes_this_iter: 38
episodes_total: 2565
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 328.88275146484375
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 39.99999237060547
        model: {}
        policy_loss: 0.012218920513987541
        var_gnorm: 47.179874420166016
        vf_explained_var: 0.48833826184272766
        vf_loss: 0.00020417789346538484
  learner_queue:
    size_count: 1170
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 859776
  num_steps_sampled: 429888
  nu

73 blue_1 DOWN
107 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 107 -1.101003871851262 -1.3051104249447991
blue_1 False False 107 -0.10100070518459539 -1.3084694918756212
-------------------------- Scene: 0 --------------------------
47 blue_1 DOWN
70 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 70 -1.1010006398980638 -1.3041218649828532
blue_1 False False 70 -0.10100160656473063 -1.508012988485546
-------------------------- Scene: 0 --------------------------
72 blue_0 DOWN
82 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 82 -0.10100166276272037 -1.3041077411177064
blue_1 False False 82 -1.1010011960960537 -1.3048893142905371
-------------------------- Scene: 0 --------------------------
106 blue_0 DOWN
118 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 118 -0.10100166530388464 -1.4062191704779932
blue_1 False False 118 -1.1009989653038847 -1.9192230827706607
-------------------------- Scene: 0 --------------------------
96 blue_0 DOWN
120 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False

111 blue_0 DOWN
228 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 228 -0.10100156597290198 -1.3052545132269433
blue_1 False False 228 -1.1009971326395687 -3.264898903390816
-------------------------- Scene: 0 --------------------------
80 blue_1 DOWN
128 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 128 -1.1009997638443438 -1.3050799366274244
blue_1 False False 128 -0.10100166384434382 -1.306243068363385
-------------------------- Scene: 0 --------------------------
132 blue_1 DOWN
153 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 153 -1.1010072725283637 -2.5252291183503197
blue_1 False False 153 -0.10100113919503037 -2.0200686867537656
-------------------------- Scene: 0 --------------------------
128 blue_0 DOWN
133 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 133 -0.10100181093767413 -1.3051070970764713
blue_1 False False 133 -1.101000710937674 -2.44490708025725
-------------------------- Scene: 0 --------------------------


2022-03-25 08:44:55,741	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000069\checkpoint-69
2022-03-25 08:44:55,741	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 69, '_timesteps_total': 866212, '_time_total': 45607.741585969925, '_episodes_total': 2607}


agent_timesteps_total: 872460
custom_metrics: {}
date: 2022-03-25_08-44-55
done: false
episode_len_mean: 152.25
episode_media: {}
episode_reward_max: -2.408204564832502
episode_reward_mean: -3.3697257878966376
episode_reward_min: -9.730866131807193
episodes_this_iter: 42
episodes_total: 2607
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 334.2056884765625
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 39.99998474121094
        model: {}
        policy_loss: -0.054877832531929016
        var_gnorm: 47.24470520019531
        vf_explained_var: 0.325394868850708
        vf_loss: 0.000227640091907233
  learner_queue:
    size_count: 1190
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 872460
  num_steps_sampled: 436230
  num_st

74 blue_0 DOWN
99 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 99 -0.1010018060726058 -1.3052405078258236
blue_1 False False 99 -1.1009954394059391 -1.205232760906815
-------------------------- Scene: 0 --------------------------
80 blue_1 DOWN
90 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 90 -1.1010006880879717 -1.4073852834237708
blue_1 False False 90 -0.1010016214213051 -1.306087039486733
-------------------------- Scene: 0 --------------------------
64 blue_1 DOWN
158 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 158 -1.1009996647427627 -3.169409284259137
blue_1 False False 158 -0.10100166474276272 -1.4072666373747709
-------------------------- Scene: 0 --------------------------
110 blue_1 DOWN
288 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 288 -1.101001827803119 -4.886398579671656
blue_1 False False 288 -0.10100166113645245 -1.3053916838709512
-------------------------- Scene: 0 --------------------------
105 blue_0 DOWN
109 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False Fal

53 blue_0 DOWN
96 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 96 -0.10100166472450814 -1.3050586735212062
blue_1 False False 96 -1.1010005313911748 -1.305974342530624
-------------------------- Scene: 0 --------------------------
56 blue_1 DOWN
59 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 59 -1.1010048308234848 -1.3041509299932783
blue_1 False False 59 -0.1010011308234848 -1.406924544969411
-------------------------- Scene: 0 --------------------------
114 blue_0 DOWN
145 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 145 -0.10100176788694298 -1.3049640313201343
blue_1 False False 145 -1.101000267886943 -2.628030674552987
-------------------------- Scene: 0 --------------------------
94 blue_0 DOWN
120 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 120 -0.10100151305451201 -1.4061396942884696
blue_1 False False 120 -1.1010040797211786 -2.6290334532634505
-------------------------- Scene: 0 --------------------------


2022-03-25 08:57:18,717	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000070\checkpoint-70
2022-03-25 08:57:18,717	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 70, '_timesteps_total': 878238, '_time_total': 46278.32763361931, '_episodes_total': 2649}


agent_timesteps_total: 884640
custom_metrics: {}
date: 2022-03-25_08-57-18
done: false
episode_len_mean: 151.2
episode_media: {}
episode_reward_max: -2.408204564832502
episode_reward_mean: -3.3950125308116013
episode_reward_min: -9.730866131807193
episodes_this_iter: 42
episodes_total: 2649
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 339.3589782714844
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 39.99999237060547
        model: {}
        policy_loss: 0.006296154577285051
        var_gnorm: 47.308807373046875
        vf_explained_var: 0.44380447268486023
        vf_loss: 0.0001588903833180666
  learner_queue:
    size_count: 1210
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 884640
  num_steps_sampled: 442320
  num_

93 blue_1 DOWN
452 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 452 -1.1010001924098094 -2.4142781686945844
blue_1 False False 452 -0.10100165907647604 -1.3063215147413159
-------------------------- Scene: 0 --------------------------


  a = s.low + (a + 1.0) * (s.high - s.low) / 2.0


136 blue_0 DOWN
248 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 248 -0.1010012959835336 -1.5072993211859902
blue_1 False False 248 -1.1010106293168669 -1.627803977598938
-------------------------- Scene: 0 --------------------------
88 blue_1 DOWN
264 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 264 -1.1010034937863775 -2.394379294710893
blue_1 False False 264 -0.1010017937863776 -1.5070456827893894
-------------------------- Scene: 0 --------------------------
50 blue_0 DOWN
78 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 78 -0.10100166243586325 -1.3050203864523413
blue_1 False False 78 -1.1010007957691965 -2.5290303674993755
-------------------------- Scene: 0 --------------------------
44 blue_1 DOWN
49 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 49 -1.1010029309290452 -1.2040831306428907
blue_1 False False 49 -0.10100166426237864 -1.2050331768820128
-------------------------- Scene: 0 --------------------------
95 blue_0 DOWN
165 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False F

2022-03-25 09:09:26,192	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000071\checkpoint-71
2022-03-25 09:09:26,192	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 71, '_timesteps_total': 890264, '_time_total': 46931.39477968216, '_episodes_total': 2686}


agent_timesteps_total: 897094
custom_metrics: {}
date: 2022-03-25_09-09-25
done: false
episode_len_mean: 159.92
episode_media: {}
episode_reward_max: -2.409116307524904
episode_reward_mean: -3.324036818358963
episode_reward_min: -6.415107437044934
episodes_this_iter: 37
episodes_total: 2686
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 344.18829345703125
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 39.999996185302734
        model: {}
        policy_loss: -0.021598253399133682
        var_gnorm: 47.37265396118164
        vf_explained_var: 0.4286888539791107
        vf_loss: 0.00014484232815448195
  learner_queue:
    size_count: 1230
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 897094
  num_steps_sampled: 448547
  nu

93 blue_0 DOWN
148 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 148 -0.10100160592780966 -1.3057254399221385
blue_1 False False 148 -1.1010009725944763 -2.0213328291215786
-------------------------- Scene: 0 --------------------------
69 blue_1 DOWN
120 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 120 -1.1010051480148602 -1.3049404948064938
blue_1 False False 120 -0.1010014480148602 -1.3051855081125976
-------------------------- Scene: 0 --------------------------
47 blue_0 DOWN
54 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 54 -0.10100130890160798 -1.4060119453666942
blue_1 False False 54 -1.100995308901608 -1.4067008907638563
-------------------------- Scene: 0 --------------------------
64 blue_0 DOWN
107 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 107 -0.10100106507897243 -1.3047562116438864
blue_1 False False 107 -1.101000231745639 -1.5070601213511485
-------------------------- Scene: 0 --------------------------
76 blue_0 DOWN
311 blue_1 DOWN
TIME LIMIT LOSE
blue_0 Fals

71 blue_0 DOWN
132 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 132 -0.10100104869705992 -1.3041600788986063
blue_1 False False 132 -1.1010009820303932 -1.9218482495307578
-------------------------- Scene: 0 --------------------------
119 blue_0 DOWN
168 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 168 -0.10100158133290812 -2.028238689575451
blue_1 False False 168 -1.1009980146662415 -1.4102458833416878
-------------------------- Scene: 0 --------------------------
126 blue_1 DOWN
147 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 147 -1.1009981301914096 -2.749219160603314
blue_1 False False 147 -0.10100166352474298 -1.4056784654225896
-------------------------- Scene: 0 --------------------------
180 blue_0 DOWN
415 blue_1 DOWN
TIME LIMIT LOSE
blue_0 False False 415 -0.10100169770615015 -3.8532419815013976
blue_1 False False 415 -1.1009964643728167 -4.134660114456197
-------------------------- Scene: 0 --------------------------


2022-03-25 09:21:52,838	INFO trainable.py:473 -- Restored on 127.0.0.1 from checkpoint: ./UCAV/checkpoints/test_2\checkpoint_000072\checkpoint-72
2022-03-25 09:21:52,838	INFO trainable.py:480 -- Current state after restoring: {'_iteration': 72, '_timesteps_total': 902290, '_time_total': 47602.57448410988, '_episodes_total': 2728}


agent_timesteps_total: 909368
custom_metrics: {}
date: 2022-03-25_09-21-52
done: false
episode_len_mean: 153.05
episode_media: {}
episode_reward_max: -2.409116307524904
episode_reward_mean: -3.3055807667653108
episode_reward_min: -7.987902095957593
episodes_this_iter: 42
episodes_total: 2728
experiment_id: 30fb3b938b8d400eb6c652f779c9e66b
hostname: DESKTOP-PBQOQLL
info:
  learner:
    default_policy:
      custom_metrics: {}
      learner_stats:
        cur_lr: 0.0002500000118743628
        entropy: 349.22674560546875
        entropy_coeff: 0.009999999776482582
        grad_gnorm: 40.000003814697266
        model: {}
        policy_loss: -0.07647603005170822
        var_gnorm: 47.43212890625
        vf_explained_var: 0.41274115443229675
        vf_loss: 0.00016993666940834373
  learner_queue:
    size_count: 1250
    size_mean: 0.0
    size_quantiles:
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    - 0.0
    size_std: 0.0
  num_agent_steps_sampled: 909368
  num_steps_sampled: 454684
  num_

2022-03-25 09:21:56,461	ERROR tf_run_builder.py:47 -- Error fetching: [{'fire': <tf.Tensor 'blue_1/cond_1/Merge:0' shape=(?,) dtype=int64>, 'tgt_id': <tf.Tensor 'blue_1/cond_1/Merge_1:0' shape=(?,) dtype=int64>, 'vector_gam_x': <tf.Tensor 'blue_1/cond_1/Merge_2:0' shape=(?, 1) dtype=float32>, 'vector_psi_x': <tf.Tensor 'blue_1/cond_1/Merge_3:0' shape=(?, 1) dtype=float32>, 'vector_psi_y': <tf.Tensor 'blue_1/cond_1/Merge_4:0' shape=(?, 1) dtype=float32>, 'velocity': <tf.Tensor 'blue_1/cond_1/Merge_5:0' shape=(?, 1) dtype=float32>}, <tf.Tensor 'blue_1/model_1/lstm/while/Exit_3:0' shape=(?, 256) dtype=float32>, <tf.Tensor 'blue_1/model_1/lstm/while/Exit_4:0' shape=(?, 256) dtype=float32>, {'action_prob': <tf.Tensor 'blue_1/Exp_4:0' shape=(?,) dtype=float32>, 'action_logp': <tf.Tensor 'blue_1/cond_2/Merge:0' shape=(?,) dtype=float32>, 'action_dist_inputs': <tf.Tensor 'blue_1/Reshape_119:0' shape=(?, 12) dtype=float32>}], feed_dict={<tf.Tensor 'blue_1/obs:0' shape=(?, 141) dtype=float32>: a

InvalidArgumentError: Graph execution error:

Detected at node 'blue_1/SparseSoftmaxCrossEntropyWithLogits_1/SparseSoftmaxCrossEntropyWithLogits' defined at (most recent call last):
    File "C:\Users\DMU\miniconda3\envs\AI2\lib\runpy.py", line 193, in _run_module_as_main
      "__main__", mod_spec)
    File "C:\Users\DMU\miniconda3\envs\AI2\lib\runpy.py", line 85, in _run_code
      exec(code, run_globals)
    File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\ipykernel_launcher.py", line 16, in <module>
      app.launch_new_instance()
    File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\traitlets\config\application.py", line 846, in launch_instance
      app.start()
    File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\ipykernel\kernelapp.py", line 677, in start
      self.io_loop.start()
    File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\tornado\platform\asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "C:\Users\DMU\miniconda3\envs\AI2\lib\asyncio\base_events.py", line 539, in run_forever
      self._run_once()
    File "C:\Users\DMU\miniconda3\envs\AI2\lib\asyncio\base_events.py", line 1775, in _run_once
      handle._run()
    File "C:\Users\DMU\miniconda3\envs\AI2\lib\asyncio\events.py", line 88, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\ipykernel\kernelbase.py", line 457, in dispatch_queue
      await self.process_one()
    File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\ipykernel\kernelbase.py", line 446, in process_one
      await dispatch(*args)
    File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\ipykernel\kernelbase.py", line 353, in dispatch_shell
      await result
    File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\ipykernel\kernelbase.py", line 648, in execute_request
      reply_content = await reply_content
    File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\ipykernel\ipkernel.py", line 353, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
      return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
    File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\IPython\core\interactiveshell.py", line 2902, in run_cell
      raw_cell, store_history, silent, shell_futures)
    File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\IPython\core\interactiveshell.py", line 2947, in _run_cell
      return runner(coro)
    File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\IPython\core\interactiveshell.py", line 3173, in run_cell_async
      interactivity=interactivity, compiler=compiler, result=result)
    File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\IPython\core\interactiveshell.py", line 3364, in run_ast_nodes
      if (await self.run_code(code, result,  async_=asy)):
    File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\IPython\core\interactiveshell.py", line 3444, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\DMU\AppData\Local\Temp/ipykernel_32/631583542.py", line 129, in <module>
      os.path.expanduser("./" + PROJECT + "/logs"), TRIAL))
    File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\agents\trainer.py", line 729, in __init__
      sync_function_tpl)
    File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\tune\trainable.py", line 122, in __init__
      self.setup(copy.deepcopy(self.config))
    File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\agents\trainer.py", line 831, in setup
      num_workers=self.config["num_workers"])
    File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\agents\trainer.py", line 1932, in _make_workers
      logdir=self.logdir,
    File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\evaluation\worker_set.py", line 131, in __init__
      spaces=spaces,
    File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\evaluation\worker_set.py", line 537, in _make_worker
      spaces=spaces,
    File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 592, in __init__
      seed=seed)
    File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 1556, in _build_policy_map
      conf, merged_conf)
    File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\policy\policy_map.py", line 134, in create_policy
      observation_space, action_space, merged_config)
    File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\policy\tf_policy_template.py", line 252, in __init__
      get_batch_divisibility_req=get_batch_divisibility_req,
    File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\policy\dynamic_tf_policy.py", line 336, in __init__
      action_dist = dist_class(dist_inputs, self.model)
    File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\models\tf\tf_action_dist.py", line 509, in __init__
      split_inputs)
    File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\tree\__init__.py", line 510, in map_structure
      [func(*args) for args in zip(*map(flatten, structures))])
    File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\tree\__init__.py", line 510, in <listcomp>
      [func(*args) for args in zip(*map(flatten, structures))])
    File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\models\tf\tf_action_dist.py", line 508, in <lambda>
      lambda dist, input_: dist(input_, model), child_distributions,
    File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\models\tf\tf_action_dist.py", line 62, in __init__
      super().__init__(inputs / temperature, model)
    File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\models\tf\tf_action_dist.py", line 29, in __init__
      self.sampled_action_logp_op = self.logp(self.sample_op)
    File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\models\tf\tf_action_dist.py", line 71, in logp
      logits=self.inputs, labels=tf.cast(x, tf.int32))
Node: 'blue_1/SparseSoftmaxCrossEntropyWithLogits_1/SparseSoftmaxCrossEntropyWithLogits'
Received a label value of 2 which is outside the valid range of [0, 2).  Label values: 2
	 [[{{node blue_1/SparseSoftmaxCrossEntropyWithLogits_1/SparseSoftmaxCrossEntropyWithLogits}}]]

Original stack trace for 'blue_1/SparseSoftmaxCrossEntropyWithLogits_1/SparseSoftmaxCrossEntropyWithLogits':
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\traitlets\config\application.py", line 846, in launch_instance
    app.start()
  File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\ipykernel\kernelapp.py", line 677, in start
    self.io_loop.start()
  File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\tornado\platform\asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\asyncio\base_events.py", line 539, in run_forever
    self._run_once()
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\asyncio\base_events.py", line 1775, in _run_once
    handle._run()
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\asyncio\events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\ipykernel\kernelbase.py", line 457, in dispatch_queue
    await self.process_one()
  File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\ipykernel\kernelbase.py", line 446, in process_one
    await dispatch(*args)
  File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\ipykernel\kernelbase.py", line 353, in dispatch_shell
    await result
  File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\ipykernel\kernelbase.py", line 648, in execute_request
    reply_content = await reply_content
  File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\ipykernel\ipkernel.py", line 353, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\IPython\core\interactiveshell.py", line 2902, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\IPython\core\interactiveshell.py", line 2947, in _run_cell
    return runner(coro)
  File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\IPython\core\interactiveshell.py", line 3173, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\IPython\core\interactiveshell.py", line 3364, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "C:\Users\DMU\AppData\Roaming\Python\Python37\site-packages\IPython\core\interactiveshell.py", line 3444, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\DMU\AppData\Local\Temp/ipykernel_32/631583542.py", line 129, in <module>
    os.path.expanduser("./" + PROJECT + "/logs"), TRIAL))
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\agents\trainer.py", line 729, in __init__
    sync_function_tpl)
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\tune\trainable.py", line 122, in __init__
    self.setup(copy.deepcopy(self.config))
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\agents\trainer.py", line 831, in setup
    num_workers=self.config["num_workers"])
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\agents\trainer.py", line 1932, in _make_workers
    logdir=self.logdir,
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\evaluation\worker_set.py", line 131, in __init__
    spaces=spaces,
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\evaluation\worker_set.py", line 537, in _make_worker
    spaces=spaces,
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 592, in __init__
    seed=seed)
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 1556, in _build_policy_map
    conf, merged_conf)
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\policy\policy_map.py", line 134, in create_policy
    observation_space, action_space, merged_config)
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\policy\tf_policy_template.py", line 252, in __init__
    get_batch_divisibility_req=get_batch_divisibility_req,
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\policy\dynamic_tf_policy.py", line 336, in __init__
    action_dist = dist_class(dist_inputs, self.model)
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\models\tf\tf_action_dist.py", line 509, in __init__
    split_inputs)
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\tree\__init__.py", line 510, in map_structure
    [func(*args) for args in zip(*map(flatten, structures))])
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\tree\__init__.py", line 510, in <listcomp>
    [func(*args) for args in zip(*map(flatten, structures))])
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\models\tf\tf_action_dist.py", line 508, in <lambda>
    lambda dist, input_: dist(input_, model), child_distributions,
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\models\tf\tf_action_dist.py", line 62, in __init__
    super().__init__(inputs / temperature, model)
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\models\tf\tf_action_dist.py", line 29, in __init__
    self.sampled_action_logp_op = self.logp(self.sample_op)
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\ray\rllib\models\tf\tf_action_dist.py", line 71, in logp
    logits=self.inputs, labels=tf.cast(x, tf.int32))
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\tensorflow\python\util\traceback_utils.py", line 150, in error_handler
    return fn(*args, **kwargs)
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\tensorflow\python\util\dispatch.py", line 1082, in op_dispatch_handler
    return dispatch_target(*args, **kwargs)
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\tensorflow\python\ops\nn_ops.py", line 4426, in sparse_softmax_cross_entropy_with_logits_v2
    labels=labels, logits=logits, name=name)
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\tensorflow\python\util\traceback_utils.py", line 150, in error_handler
    return fn(*args, **kwargs)
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\tensorflow\python\util\dispatch.py", line 1082, in op_dispatch_handler
    return dispatch_target(*args, **kwargs)
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\tensorflow\python\ops\nn_ops.py", line 4339, in sparse_softmax_cross_entropy_with_logits
    precise_logits, labels, name=name)
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\tensorflow\python\ops\nn_ops.py", line 4238, in _sparse_softmax_cross_entropy_with_rank_2_logits
    logits, labels, name=name)
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\tensorflow\python\ops\gen_nn_ops.py", line 11339, in sparse_softmax_cross_entropy_with_logits
    labels=labels, name=name)
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 742, in _apply_op_helper
    attrs=attr_protos, op_def=op_def)
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\tensorflow\python\framework\ops.py", line 3784, in _create_op_internal
    op_def=op_def)
  File "C:\Users\DMU\miniconda3\envs\AI2\lib\site-packages\tensorflow\python\framework\ops.py", line 2175, in __init__
    self._traceback = tf_stack.extract_stack_for_node(self._c_op)


  self.func()
