In [1]:
%matplotlib tk

import argparse
import gym
import datetime
import os
import random
import tempfile
import numpy as np
import pickle

import ray
from ray import tune
from ray.tune.logger import Logger, UnifiedLogger, pretty_print
from ray.rllib.env.multi_agent_env import make_multi_agent
from ray.rllib.examples.models.shared_weights_model import TF2SharedWeightsModel
from ray.rllib.models import ModelCatalog
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.test_utils import check_learning_achieved
from ray.rllib.agents.ppo import ppo, PPOTrainer, PPOTFPolicy
from ray.rllib.agents.impala.vtrace_tf_policy import VTraceTFPolicy
from ray.rllib.agents.impala import impala, ImpalaTrainer
from ray.rllib.models import ModelCatalog
from ray.rllib.policy.policy import PolicySpec
from environment_rllib_3d import MyEnv
#from test_env_for_lstm import MyEnv
from settings.initial_settings import *
from settings.reset_conditions import reset_conditions
#from modules.models import MyConv2DModel_v0B_Small_CBAM_1DConv_Share
#from modules.models import MyRNNUAVClass
#from modules.models import DenseNetModelLarge
from tensorflow.keras.utils import plot_model
from modules.savers import save_conditions
from utility.result_env import render_env
from utility.terminate_uavsimproc import teminate_proc
from utility.latest_learned_file_path import latest_learned_file_path
from utility.save_logs import save_logs_IMPALA
from utility.save_logs import save_hists
from utility.save_logs import save_env_info

import matplotlib.pyplot as plt
import matplotlib
import tensorflow as tf
import cv2
import ctypes
import warnings

#UCAV.exeが起動している場合、プロセスキルする。
teminate_proc.UAVsimprockill(proc_name="UCAV.exe")

warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) 
warnings.filterwarnings('ignore', category=matplotlib.MatplotlibDeprecationWarning)
np.set_printoptions(precision=3, suppress=True)
PROJECT = "UCAV"
TRIAL_ID = 2
TRIAL = 'test_' + str(TRIAL_ID)
EVAL_FREQ = 1
CONTINUAL = False
NUM_EVAL = 1
def custom_log_creator(custom_path, custom_str):
    timestr = datetime.datetime.today().strftime("%Y-%m-%d_%H-%M-%S")
    logdir_prefix = "{}_{}".format(custom_str, timestr)

    def logger_creator(config):
        if not os.path.exists(custom_path):
            os.makedirs(custom_path)
        logdir = tempfile.mkdtemp(prefix=logdir_prefix, dir=custom_path)
        return UnifiedLogger(config, logdir, loggers=None)

    return logger_creator

ray.shutdown()
ray.init(ignore_reinit_error=True, log_to_driver=False)

#ModelCatalog.register_custom_model('my_model', MyRNNUAVClass)

eval_env = MyEnv()
policies = {
    #"blue_1": PolicySpec(config={"gamma": 0.99}),
    #"blue_2": PolicySpec(config={"gamma": 0.95}),
    #"blue_0": (PPOTFPolicy, eval_env.observation_space, eval_env.action_space,
    #           {"model":{"vf_share_layers": False,"use_lstm": True,"max_seq_len": 200},}),
    #"blue_1": (PPOTFPolicy, eval_env.observation_space, eval_env.action_space,
    #           {"model":{"vf_share_layers": False,"use_lstm": True,"max_seq_len": 200},}),
    "blue_0": (VTraceTFPolicy, eval_env.observation_space, eval_env.action_space,
               {"model":{"vf_share_layers": False,"use_lstm": True,"max_seq_len": 200},}),
    "blue_1": (VTraceTFPolicy, eval_env.observation_space, eval_env.action_space,
               {"model":{"vf_share_layers": False,"use_lstm": True,"max_seq_len": 200},}),
}
policy_ids = list(policies.keys())

def policy_mapping_fn(agent_id, episode, **kwargs):
    #print(agent_id,episode)
    #pol_id = policy_ids[agent_id]

    pol_id = agent_id
    return pol_id

# Instanciate the evaluation env
config = impala.DEFAULT_CONFIG.copy()
config = {"env": MyEnv,"num_gpus": 0,"num_workers": 0, "num_cpus_per_worker": 0,"num_gpus_per_worker": 0,
          "train_batch_size": 600*5*2,
          "batch_mode": "complete_episodes",
          "gamma":0.995, "lr": 2.5e-4,
          #"clip_actions":True,"normalize_actions":True,
          "observation_space":eval_env.observation_space,"action_space":eval_env.action_space,
          "explore":True,
          "rollout_fragment_length":300,"num_sgd_iter": 20,"learner_queue_size": 300,
          #"sgd_minibatch_size": 300, "num_sgd_iter":20,
          #"exploration_config": {"type": "StochasticSampling","random_timesteps":0}, #PPO デフォルト "random_timesteps":0
          #"model":{"fcnet_activation": "relu","fcnet_hiddens": [256, 256, 256],"post_fcnet_activation": "linear",
          #         "vf_share_layers": True,},#"linear","relu","tanh" "use_lstm":True,"lstm_cell_size":256,"max_seq_len":128
          "learner_queue_timeout": 900,
          #"model": {"custom_model": "my_model"},
          "multiagent": {"policies": policies,  "policy_mapping_fn": policy_mapping_fn}
         }
#res_name = "sgd"+str(config["sgd_minibatch_size"])+"sgd_num"+str(config["num_sgd_iter"])+"lr"+str(config["lr"])+"gamma"+str(config["gamma"])
res_name = "test"
conditions_dir = os.path.join('./' + PROJECT + '/conditions/')

if not os.path.exists(conditions_dir):
    os.makedirs(conditions_dir)
save_conditions(conditions_dir)

# PPOTrainer()は、try_import_tfを使うと、なぜかTensorflowのeager modeのエラーになる。

trainer = impala.ImpalaTrainer(config=config,
                         logger_creator=custom_log_creator(
                             os.path.expanduser("./" + PROJECT + "/logs"), TRIAL))

if CONTINUAL:
    # Continual learning: Need to specify the checkpoint
    # model_path = PROJECT + '/checkpoints/' + TRIAL + '/checkpoint_000197/checkpoint-197'
    model_path = latest_learned_file_path('./UCAV/checkpoints/test_2/*')
    trainer.restore(checkpoint_path=model_path)

models_dir = os.path.join('./' + PROJECT + '/models/')
if not os.path.exists(models_dir):
    os.makedirs(models_dir)
for j in range(2):
    text_name = models_dir + TRIAL + "blue_"+str(j) +'.txt'
    with open(text_name, "w") as fp:
        trainer.get_policy("blue_"+str(j)).model.base_model.summary(print_fn=lambda x: fp.write(x + "\r\n"))
    png_name = models_dir + TRIAL + '.png'
    plot_model(trainer.get_policy("blue_"+str(j)).model.base_model, to_file=png_name, show_shapes=True)



# Define checkpoint dir
check_point_dir = os.path.join('./' + PROJECT + '/checkpoints/', TRIAL)
if not os.path.exists(check_point_dir):
    os.makedirs(check_point_dir)

  for external in metadata.entry_points().get(self.group, []):

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
Could not import from numba, which means that some
parts of this code may run MUCH more slowly.  You
may wish to install numba.
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

2022-03-24 11:17:07,394	INFO trainer.py:2055 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
2022-03-24 11:17:07,394	INFO trainer.py:792 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model/model_to_dot to work.
You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model/model_to_dot to work.


In [None]:
# -*- coding: utf-8 -*-
"""
Created on Mon Mar  7 21:50:01 2022

@author: Takumi
"""
eval_env.reset()
save_env_info(eval_env)
record_mode = 0
results_dir = os.path.join('./' + PROJECT + '/results/')

if not os.path.exists(results_dir):
    os.makedirs(results_dir)
results_file = results_dir + TRIAL + '.pkl'
for steps in range(10001):
    # Training
    print(f'\n----------------- Training at steps:{steps} start! -----------------')
    eval_env.eval = False
    eval_env.reset()
    results = trainer.train()
    save_logs_IMPALA(res_name,results,steps,CONTINUAL)
    print(pretty_print(results))
    #check_point = trainer.save(checkpoint_dir=check_point_dir)
    # Evaluation
    if steps % EVAL_FREQ == 0:
        print(f'\n-------------- Evaluation at steps:{steps} starting ! --------------')

        check_point = trainer.save(checkpoint_dir=check_point_dir)
        for i in range(NUM_EVAL):
            # print(f'\nEvaluation {i}:')
            model_path = latest_learned_file_path('./UCAV/checkpoints/test_2/*')
            trainer.restore(checkpoint_path=model_path)
            eval_env.eval = True
            obs = eval_env.reset()
            done = False
            
            step_num = 0
            #fig = plt.figure(1,figsize=(8.0, 6.0))
            ESC = 0x1B          # ESCキーの仮想キーコード
            trajectory_length = 100

            cell_size = 256
            state_0=[np.zeros(cell_size, np.float32),np.zeros(cell_size, np.float32)]
            state_1=[np.zeros(cell_size, np.float32),np.zeros(cell_size, np.float32)]
            action_dict0 = [0,0]
            action_dict1 = [0,0]
            rewards = {"blue_0":0,"blue_1":0}
            if record_mode == 0:
                file_name = "test_num" + str(steps) +str(i)
                #video = cv2.VideoWriter(file_name+'.mp4',0x00000020,20.0,(800,600))

            while True:
                action_dict = {}
                action_dict0 = trainer.compute_single_action(obs['blue_0'],
                                                             state=state_0,prev_action=None,prev_reward=None,
                                                             policy_id='blue_0',explore=False)
                action_dict1 = trainer.compute_single_action(obs['blue_1'],
                                                             state=state_1,prev_action=None,prev_reward=None,
                                                             policy_id='blue_1',explore=False)
                
                #action_dict0 = trainer.compute_single_action(obs['blue_0'],policy_id='blue_0')
                #action_dict1 = trainer.compute_single_action(obs['blue_1'],policy_id='blue_1')
                state_0 = action_dict0[1]
                state_1 = action_dict1[1]
                obs, rewards, dones, infos = eval_env.step({'blue_0': action_dict0[0], 'blue_1': action_dict1[0]})

                env_blue_pos_temp_mod, env_red_pos_temp_mod, env_mrm_pos_temp_mod = render_env.copy_from_env_mod(eval_env)
                if eval_env.timer == 1:
                    env_blue_pos_mod = env_blue_pos_temp_mod
                    env_red_pos_mod = env_red_pos_temp_mod
                    env_mrm_pos_mod = env_mrm_pos_temp_mod
                else:
                    env_blue_pos_mod = np.vstack([env_blue_pos_mod,env_blue_pos_temp_mod])
                    env_red_pos_mod = np.vstack([env_red_pos_mod,env_red_pos_temp_mod])
                    env_mrm_pos_mod = np.vstack([env_mrm_pos_mod,env_mrm_pos_temp_mod])

                # plt.clf()

                # plt.subplots_adjust(left=-0.1,right=1.1,bottom=-0.1,top=1.1)
                # fig.canvas.draw()
                # plt.pause(.01)

                #if record_mode == 0:
                    #img = np.array(fig.canvas.renderer.buffer_rgba())
                    #img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
                    #video.write(img.astype('uint8'))

                
                step_num = step_num + 1
                
                done = dones["__all__"]
                
                #print(f'rewards:{rewards}')
                #if record_mode == 0:
                #    img = eval_env.render_movie(file_name,step_num)
                #    video.write(img.astype('unit8'))
                #elif record_mode == 1:
                #    eval_env.render()
                #elif record_mode == 2:
                #    eval_env.render()
                    
                #env_blue_pos_temp, env_red_pos_temp, env_mrm_pos_temp = render_env.copy_from_env(eval_env)
                
                #env_blue_pos.append(env_blue_pos_temp)
                #env_red_pos.append(env_red_pos_temp)
                #env_mrm_pos.append(env_mrm_pos_temp)
                #step_num = step_num + 1
                # エピソードの終了処理
                if dones['__all__']:
                    save_hists("blue",steps,env_blue_pos_mod)
                    save_hists("red",steps,env_red_pos_mod)
                    save_hists("mrm",steps,env_mrm_pos_mod)
                    # print(f'all done at {env.steps}')
                    break
                

            
            #if record_mode == 0:
               # video.release()

ray.shutdown()

-------------------------- Scene: 0 --------------------------

----------------- Training at steps:0 start! -----------------
-------------------------- Scene: 0 --------------------------
-------------------------- Scene: 0 --------------------------
121 blue_1 DOWN
482 blue_0 Shoot at red_1 launch distance : 56372.93295488006 True True
493 blue_0 Shoot at red_1 launch distance : 52473.834320350965 True True
536 red_0 Shoot at blue_0
547 red_0 Shoot at blue_0
610 blue_0 Splash :red_1
640 blue_0: Destroyed
TIME LIMIT LOSE
blue_0 False False 640 -0.13994421051278277 113.31906618727442
blue_1 False False 640 -0.10099981051278278 -1.3071002848420847
-------------------------- Scene: 0 --------------------------
133 blue_1 DOWN
224 blue_0 DOWN
TIME LIMIT LOSE
blue_0 False False 224 -1.1010029311937106 -2.6646747510607898
blue_1 False False 224 -0.10100186452704389 -1.4181754768746053
-------------------------- Scene: 0 --------------------------
361 blue_1 DOWN
