In [1]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
import matplotlib.pyplot as plt
import random
import torch
from torch import nn
from common.network import DuelingNetwork
from common.util import *

In [2]:
""" speed """
speed = "fast" # "slow", "equal" or "fast"

""" Epsilon """
epsilon = 0

In [3]:
""" seed """
seeds = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
names = ["self_play"]

for seed in seeds:

    """ participant """
    for name in names:

        np.random.seed(seed)
        random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)

        """ divice """
        device = torch.device("cpu")
        print(device)

        """ Network """
        net_p = DuelingNetwork(10, 13).to(device)
        net_e = DuelingNetwork(10, 13).to(device)
        
        """ Environment """
        env_dir = os.path.join(os.pardir, "c1ae")
        sys.path.append(env_dir)
        from chase1_and_escape import Chase1AndEscape
        
        if speed == "fast":
            speed_p = 3.6
        elif speed == "equal":
            speed_p = 3.0
        elif speed == "slow":
            speed_p = 2.4
        
        speed_e = 3
        max_step_episode = 300
        env = Chase1AndEscape(speed_pursuer=speed_p, speed_evader=speed_e, max_step=max_step_episode)

        """ Load """
        net_p.load_state_dict(torch.load("../model/c1ae/p_" + str (speed_p) + ".pth"))
        net_e.load_state_dict(torch.load("../model/c1ae/e_" + str (speed_p) + ".pth"))

        """ No. of episodes """
        num_episodes_test = 100

        """ Simulation """
        rep_v_list = []
        rep_a_list = []
        q_list = []
        pos_list = []

        for i in range(num_episodes_test):

            pursuer_rep_v_episode = []
            evader_rep_v_episode = []
            pursuer_rep_a_episode = []
            evader_rep_a_episode = []
            pursuer_q_episode = []
            evader_q_episode = []
            pursuer_pos_episode = []
            evader_pos_episode = []

            obs_p, obs_e = env.reset()
            obs_p, obs_e = torch.Tensor(obs_p), torch.Tensor(obs_e)
            done = False
            step_episode = 0

            while not done:

                feature_p = net_p.forward_com(obs_p.float().to(device))
                feature_v_p = torch.relu(torch.matmul(feature_p, net_p.fc_state[0].weight.T) + net_p.fc_state[0].bias)
                feature_a_p = torch.relu(torch.matmul(feature_p, net_p.fc_advantage[0].weight.T) + net_p.fc_advantage[0].bias)

                feature_e = net_e.forward_com(obs_e.float().to(device))
                feature_v_e = torch.relu(torch.matmul(feature_e, net_e.fc_state[0].weight.T) + net_e.fc_state[0].bias)
                feature_a_e = torch.relu(torch.matmul(feature_e, net_e.fc_advantage[0].weight.T) + net_e.fc_advantage[0].bias)

                q_p = net_p.forward(obs_p.float().to(device))
                q_e = net_e.forward(obs_e.float().to(device))

                action_p = net_p.act(obs_p.float().to(device), epsilon)
                action_e = net_e.act(obs_e.float().to(device), epsilon)

                next_obs_p, next_obs_e, reward_p, reward_e, done = env.step(action_p, action_e, step_episode)
                next_obs_p, next_obs_e = torch.Tensor(next_obs_p), torch.Tensor(next_obs_e)        

                obs_p = next_obs_p
                obs_e = next_obs_e                
                step_episode += 1

                pos_p = env.pos_p
                pos_e = env.pos_e

                dist = get_dist(np.array(pos_p), np.array(pos_e))

                pursuer_rep_v_episode.append(np.array(feature_v_p.detach().numpy()))
                pursuer_rep_a_episode.append(np.array(feature_a_p.detach().numpy()))
                evader_rep_v_episode.append(np.array(feature_v_e.detach().numpy()))
                evader_rep_a_episode.append(np.array(feature_a_e.detach().numpy()))
                pursuer_q_episode.append(np.array(q_p.detach().numpy()))
                evader_q_episode.append(np.array(q_e.detach().numpy()))
                pursuer_pos_episode.append(np.array(pos_p))
                evader_pos_episode.append(np.array(pos_e))

            rep_v_episode = []
            rep_v_episode.append(evader_rep_v_episode)
            rep_v_episode.append(pursuer_rep_v_episode)
            rep_v_list.append(rep_v_episode)

            rep_a_episode = []
            rep_a_episode.append(evader_rep_a_episode)
            rep_a_episode.append(pursuer_rep_a_episode)
            rep_a_list.append(rep_a_episode)
            
            q_episode = []
            q_episode.append(evader_q_episode)
            q_episode.append(pursuer_q_episode)   
            q_list.append(q_episode)

            pos_episode = []
            pos_episode.append(evader_pos_episode)
            pos_episode.append(pursuer_pos_episode)
            pos_list.append(pos_episode)
            
        """ Save """
        save_dir = "self_play_results"
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        if epsilon==0:
            name_policy = "greedy"
        else:
            name_policy = "epsilon_greedy"        
        
        file_path = os.path.join(save_dir, "results_1on1_" + name + "_" +  speed + "_" + name_policy + "_seed_" + str(seed) + ".npz")
        np.savez(file_path, pos=pos_list, q=q_list, rep_v=rep_v_list, rep_a=rep_a_list)


cpu


  val = np.asanyarray(val)


cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
