In [4]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
import matplotlib.pyplot as plt
import random
import torch
from torch import nn
from common.network import DuelingNetwork
from common.util import *

In [5]:
""" speed """
speed = "slow" # "slow", "equal" or "fast"

"reward"
reward_p = "share" # "indiv" or "share"

""" Epsilon """
epsilon = 0.1

In [6]:
""" seed """
seeds = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
names = ["self_play"]

for seed in seeds:

    """ participant """
    for name in names:

        np.random.seed(seed)
        random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)

        """ divice """
        device = torch.device("cpu")
        print(device)

        """ Network """
        net_p1 = DuelingNetwork(18, 13).to(device)
        net_p2 = DuelingNetwork(18, 13).to(device)
        net_e = DuelingNetwork(18, 13).to(device)
        
        """ Environment """
        env_dir = os.path.join(os.pardir, "c2ae")
        sys.path.append(env_dir)
        from chase2_and_escape import Chase2AndEscape
        
        if speed == "fast":
            speed_p = 3.6
        elif speed == "equal":
            speed_p = 3.0
        elif speed == "slow":
            speed_p = 2.4
        
        speed_e = 3
        
        if reward_p == "indiv":
            share_reward = False
        elif reward_p == "share":
            share_reward = True
        
        max_step_episode = 300
        env = Chase2AndEscape(speed_pursuer1=speed_p, speed_pursuer2=speed_p, speed_evader=speed_e, max_step=max_step_episode, reward_share=share_reward)
        
        """ Load """
        net_p1.load_state_dict(torch.load("../model/c2ae/reward_" + reward_p + "/p1_" +  str (speed_p) + ".pth")) 
        net_p2.load_state_dict(torch.load("../model/c2ae/reward_" + reward_p + "/p2_" +  str (speed_p) + ".pth")) 
        net_e.load_state_dict(torch.load("../model/c2ae/reward_" + reward_p + "/e_" +  str (speed_p) + ".pth")) 

        """ No. of episodes """
        num_episodes_test = 100

        """ Simulation """
        rep_v_list = []
        rep_a_list = []
        q_list = []
        pos_list = []

        for i in range(num_episodes_test):

            pursuer1_rep_v_episode = []
            pursuer2_rep_v_episode = []
            evader_rep_v_episode = []
            pursuer1_rep_a_episode = []
            pursuer2_rep_a_episode = []
            evader_rep_a_episode = []
            pursuer1_q_episode = []
            pursuer2_q_episode = []
            evader_q_episode = []
            pursuer1_pos_episode = []
            pursuer2_pos_episode = []
            evader_pos_episode = []

            obs_p1, obs_p2, obs_e = env.reset()
            obs_p1, obs_p2, obs_e = torch.Tensor(obs_p1), torch.Tensor(obs_p2), torch.Tensor(obs_e)
            done = False
            step_episode = 0

            while not done:

                feature_p1 = net_p1.forward_com(obs_p1.float().to(device))
                feature_v_p1 = torch.relu(torch.matmul(feature_p1, net_p1.fc_state[0].weight.T) + net_p1.fc_state[0].bias)
                feature_a_p1 = torch.relu(torch.matmul(feature_p1, net_p1.fc_advantage[0].weight.T) + net_p1.fc_advantage[0].bias)

                feature_p2 = net_p2.forward_com(obs_p2.float().to(device))
                feature_v_p2 = torch.relu(torch.matmul(feature_p2, net_p2.fc_state[0].weight.T) + net_p2.fc_state[0].bias)
                feature_a_p2 = torch.relu(torch.matmul(feature_p2, net_p2.fc_advantage[0].weight.T) + net_p2.fc_advantage[0].bias)
                
                feature_e = net_e.forward_com(obs_e.float().to(device))
                feature_v_e = torch.relu(torch.matmul(feature_e, net_e.fc_state[0].weight.T) + net_e.fc_state[0].bias)
                feature_a_e = torch.relu(torch.matmul(feature_e, net_e.fc_advantage[0].weight.T) + net_e.fc_advantage[0].bias)

                q_p1 = net_p1.forward(obs_p1.float().to(device))
                q_p2 = net_p2.forward(obs_p2.float().to(device))
                q_e = net_e.forward(obs_e.float().to(device))

                action_p1 = net_p1.act(obs_p1.float().to(device), epsilon)
                action_p2 = net_p2.act(obs_p2.float().to(device), epsilon)
                action_e = net_e.act(obs_e.float().to(device), epsilon)

                next_obs_p1, next_obs_p2, next_obs_e, reward_p1, reward_p2, reward_e, done = env.step(action_p1, action_p2, action_e, step_episode)
                next_obs_p1, next_obs_p2, next_obs_e = torch.Tensor(next_obs_p1), torch.Tensor(next_obs_p2), torch.Tensor(next_obs_e)    

                obs_p1 = next_obs_p1
                obs_p2 = next_obs_p2
                obs_e = next_obs_e
                step_episode += 1

                pos_p1 = env.pos_p1
                pos_p2 = env.pos_p2
                pos_e = env.pos_e

                pursuer1_rep_v_episode.append(np.array(feature_v_p1.detach().numpy()))
                pursuer1_rep_a_episode.append(np.array(feature_a_p1.detach().numpy()))
                pursuer2_rep_v_episode.append(np.array(feature_v_p2.detach().numpy()))
                pursuer2_rep_a_episode.append(np.array(feature_a_p2.detach().numpy()))
                evader_rep_v_episode.append(np.array(feature_v_e.detach().numpy()))
                evader_rep_a_episode.append(np.array(feature_a_e.detach().numpy()))
                pursuer1_q_episode.append(np.array(q_p1.detach().numpy()))
                pursuer2_q_episode.append(np.array(q_p2.detach().numpy()))
                evader_q_episode.append(np.array(q_e.detach().numpy()))
                pursuer1_pos_episode.append(np.array(pos_p1))
                pursuer2_pos_episode.append(np.array(pos_p2))
                evader_pos_episode.append(np.array(pos_e))

            rep_v_episode = []
            rep_v_episode.append(evader_rep_v_episode)
            rep_v_episode.append(pursuer1_rep_v_episode)
            rep_v_episode.append(pursuer2_rep_v_episode)
            rep_v_list.append(rep_v_episode)

            rep_a_episode = []
            rep_a_episode.append(evader_rep_a_episode)
            rep_a_episode.append(pursuer1_rep_a_episode)
            rep_a_episode.append(pursuer2_rep_a_episode)
            rep_a_list.append(rep_a_episode)
            
            q_episode = []
            q_episode.append(evader_q_episode)
            q_episode.append(pursuer1_q_episode)
            q_episode.append(pursuer2_q_episode)
            q_list.append(q_episode)

            pos_episode = []
            pos_episode.append(evader_pos_episode)
            pos_episode.append(pursuer1_pos_episode)
            pos_episode.append(pursuer2_pos_episode)
            pos_list.append(pos_episode)
            
        """ Save """
        save_dir = "self_play_results"
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        if epsilon==0:
            name_policy = "greedy"
        else:
            name_policy = "epsilon_greedy"
        
        file_path = os.path.join(save_dir, "results_2on1_" + name + "_" + reward_p + "_" +  speed + "_" + name_policy + "_seed_" + str(seed) + ".npz")
        np.savez(file_path, pos=pos_list, q=q_list, rep_v=rep_v_list, rep_a=rep_a_list)


cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
