# Setup

In [21]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [22]:
from os.path import exists
from os import chdir

In [23]:
if exists("/content/drive/MyDrive/ORT/Posgrado AI/Multiagentes/Obligatorio/ObligatorioPacman"):
  chdir("/content/drive/MyDrive/ORT/Posgrado AI/Multiagentes/Obligatorio/ObligatorioPacman")
else:
  chdir('/content/drive/MyDrive/ORT/Multiagentes/Obligatorio/ObligatorioPacman')

!ls

entregables  ghost_dqn_training.ipynb  layouts	__pycache__  script.py
game_logic   ghost_tester.ipynb        main.py	runs	     weights


## Imports

In [24]:
import torch

from typing import List

from entregables.qlearning import DQN_Model
from entregables.double_dqn_agent import DoubleDQNAgent
from game_logic.ghostAgents import RandomGhost
from entregables.maxNAgent import MaxNAgent
from game_logic.randomPacman import RandomPacman
from game_logic.PacmanEnvAbs import PacmanEnvAbs
import random
import math
import numpy as np

from game_logic import game_util

from main import get_default_agents
import timeit 
import pandas as pd

# Configuration

In [25]:
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(DEVICE)

cpu


In [26]:
class TestParams():
    def __init__(self, test_name, layout, pacman_agent, agents):        
        self.test_name = test_name
        self.layout = layout
        self.pacman_agent = pacman_agent
        self.len_agents = len(agents)
        for i, agent in enumerate(agents):
            setattr(self, f'ghost_agent_{i}', agent)

In [27]:
all_layouts = [
        "custom1",
        "custom2",
        "capsuleClassic",
        "contestClassic",
        "mediumClassic",
        "minimaxClassic",
        "openClassic",
        "originalClassic",
        "smallClassic",
        "testClassic",
        "trappedClassic",
        "trickyClassic",
        "mediumGrid",
        "smallGrid"
    ]

## Functions

In [28]:
def process_state(state, view_distance, agent_index):
    state_view = game_util.process_state(state, view_distance, agent_index)
    state_pad = np.pad(state_view, ((0, MATRIX_SIZE - state_view.shape[0]), (0, MATRIX_SIZE - state_view.shape[1])),
                        'constant', constant_values=1)
    return state_pad

In [29]:
def run_test(test_params: object, extend_agents: bool = False, amount_extended_agents: int = 3):  
    t0 = timeit.default_timer()   
    pacman_agent = test_params.pacman_agent
    agents = [pacman_agent]
    
    for r in range(0, test_params.len_agents):
        ghost = getattr(test_params, f'ghost_agent_{r}')
        agents.append(ghost)

    if extend_agents:
        agents.extend(get_default_agents(amount_extended_agents, 10)) 

    done = False
    env = PacmanEnvAbs(agents=agents, view_distance=(2, 2))
    game_state = env.reset(enable_render=False, layout_name=test_params.layout)
    
    if len(agents) != game_state.getNumAgents():
        print(f'WARNING: Layout only allow {game_state.getNumAgents() - 1} ghost(s).')
    
    turn_index = 0    
    while (not(done)):
        action = agents[turn_index].getAction(game_state)
        game_state, rewards, done, info = env.step(action, turn_index) 
     
        turn_index = (turn_index + 1) % game_state.getNumAgents()  
    
    t1 = timeit.default_timer()    
    time = t1 - t0  
    
    assert(game_state.getNumAgents() >= 2)
    
    ret = game_state.get_rewards()

    while len(ret) < len(agents):
        ret.append(np.nan)
    
    return ret, time

## DQN Nets

In [30]:
MATRIX_SIZE = 30
ACTION_SPACE_N = 5
AGENT_INDEX = 3
ENV_NAME = 'GhostDQN'

VIEW_DISTANCE = (4, 4)

In [31]:
net_a = DQN_Model(input_size=MATRIX_SIZE * MATRIX_SIZE, n_actions=ACTION_SPACE_N).to(DEVICE)
net_b = DQN_Model(input_size=MATRIX_SIZE * MATRIX_SIZE, n_actions=ACTION_SPACE_N).to(DEVICE)

## Agents

In [32]:
pacman_agent = RandomPacman(index=0)

ghost_agent_0 = MaxNAgent(index=1, unroll_type="MCTS", max_unroll_depth=12, number_of_unrolls=6, view_distance=VIEW_DISTANCE)
ghost_agent_1 = MaxNAgent(index=2, unroll_type="MC", max_unroll_depth=12, number_of_unrolls=6, view_distance=VIEW_DISTANCE)
ghost_agent_2 = DoubleDQNAgent(
        None,  # not used for training
        net_a,
        net_b,
        [],  # not used for training
        AGENT_INDEX,   # index: not used for training
        [],  # not used for training
        [],  # not used for training
        process_state,
        0,  # not used for training
        0,  # not used for training
        0,  # not used for training
        0,  # not used for training
        epsilon_i=0,  # not used for training
        epsilon_f=0,  # not used for training
        epsilon_anneal_time=0,  # not used for training
        episode_block=0,  # not used for training
        use_pretrained=True,
        save_between_steps=False,
        view_distance=VIEW_DISTANCE
    )
ghost_agent_3 = RandomGhost(index=4)

agents = [ghost_agent_0, ghost_agent_1, ghost_agent_2, ghost_agent_3]

INFO: Using weights from: ./weights/double_DQNAgent_a.pt & ./weights/double_DQNAgent_b.pt


In [33]:
sample_test = TestParams("PrimerTest", "mediumGrid", pacman_agent, agents)
print(run_test(sample_test))

([-1502, -2349.0, nan, nan, nan], 0.8825722720002886)


# Tests

In [34]:
selected_layouts = [
        "custom1",
        "capsuleClassic",
        "contestClassic",
        "mediumClassic",
        "minimaxClassic",
        "originalClassic",
        "smallClassic",
        "trappedClassic",
        "trickyClassic",
    ]
pacmans = [("rnd_pcmn", RandomPacman(index=0))]

ghosts = [('all', [ghost_agent_0, ghost_agent_1, ghost_agent_2, ghost_agent_3])]

ghost_agent_0 = MaxNAgent(index=1, unroll_type="MCTS", max_unroll_depth=12, number_of_unrolls=6, view_distance=VIEW_DISTANCE)
ghost_agent_3 = RandomGhost(index=2)
ghosts.append(('mcts_vs_random', [ghost_agent_0, ghost_agent_3]))

ghost_agent_1 = MaxNAgent(index=1, unroll_type="MC", max_unroll_depth=12, number_of_unrolls=6, view_distance=VIEW_DISTANCE)
ghosts.append(('mc_vs_random', [ghost_agent_1, ghost_agent_3]))

ghost_agent_2 = DoubleDQNAgent(
        None,  # not used for training
        net_a,
        net_b,
        [],  # not used for training
        1,   # index: not used for training
        [],  # not used for training
        [],  # not used for training
        process_state,
        0,  # not used for training
        0,  # not used for training
        0,  # not used for training
        0,  # not used for training
        epsilon_i=0,  # not used for training
        epsilon_f=0,  # not used for training
        epsilon_anneal_time=0,  # not used for training
        episode_block=0,  # not used for training
        use_pretrained=True,
        save_between_steps=False,
        view_distance=VIEW_DISTANCE
    )
ghosts.append(('dqn_vs_random', [ghost_agent_2, ghost_agent_3]))

ghost_agent_0 = MaxNAgent(index=1, unroll_type="MCTS", max_unroll_depth=12, number_of_unrolls=6, view_distance=VIEW_DISTANCE)
ghost_agent_1 = MaxNAgent(index=2, unroll_type="MC", max_unroll_depth=12, number_of_unrolls=6, view_distance=VIEW_DISTANCE)
ghosts.append(('mcts_vs_mc', [ghost_agent_0, ghost_agent_1]))

ghost_agent_2 = DoubleDQNAgent(
        None,  # not used for training
        net_a,
        net_b,
        [],  # not used for training
        2,   # index: not used for training
        [],  # not used for training
        [],  # not used for training
        process_state,
        0,  # not used for training
        0,  # not used for training
        0,  # not used for training
        0,  # not used for training
        epsilon_i=0,  # not used for training
        epsilon_f=0,  # not used for training
        epsilon_anneal_time=0,  # not used for training
        episode_block=0,  # not used for training
        use_pretrained=True,
        save_between_steps=False,
        view_distance=VIEW_DISTANCE
    )
ghosts.append(('mcts_vs_dqn', [ghost_agent_0, ghost_agent_2]))

ghost_agent_1 = MaxNAgent(index=1, unroll_type="MC", max_unroll_depth=12, number_of_unrolls=6, view_distance=VIEW_DISTANCE)
ghosts.append(('mc_vs_dqn', [ghost_agent_1, ghost_agent_2]))

INFO: Using weights from: ./weights/double_DQNAgent_a.pt & ./weights/double_DQNAgent_b.pt
INFO: Using weights from: ./weights/double_DQNAgent_a.pt & ./weights/double_DQNAgent_b.pt


In [35]:
all_tests = []
for layout in selected_layouts:
    for pacman_name, pacman_agent in pacmans:
        for gh_name, ghs in ghosts:
            name = layout + "_" + pacman_name + "_" + gh_name
            test = TestParams(name, layout, pacman_agent, ghs)
            all_tests.append(test)

In [36]:
NUMBER_OF_RUNS = 20

In [37]:
def get_results(tests: List) -> pd.DataFrame:
    results = []
    t0 = timeit.default_timer()   
    for i in range(len(tests)):
            print(f'Running {tests[i].test_name}...')   
            values = []
            times = []    
            for r in range(NUMBER_OF_RUNS):
                print(f'>> Iteration {r+1}/{NUMBER_OF_RUNS}')
                value, time = run_test(tests[i])            
                values.append(value)
                times.append(time)

            # compute avg and std and and add to results
            value_avg = np.round(np.mean(np.array(values), axis=0), 2)
            time_avg = np.round(np.mean(np.array(times)), 2)
            value_std = np.round(np.std(np.array(values), axis=0), 2)
            time_std = np.round(np.std(np.array(times)), 2)
            
            result = {
                'TestName': tests[i].test_name, 
                'Time_Avg': time_avg, 
                'Time_Std': time_std, 
                'P_Value_Avg': value_avg[0], 
                'P_Value_Std': value_std[0]
            }

            for j in range(tests[i].len_agents):
                result[f'GH{j}_Value_Avg'] = value_avg[j+1]
                result[f'GH{j}_Value_Std'] = value_std[j+1]

            results.append(result)

    results = pd.DataFrame(results)
    t1 = timeit.default_timer()  
    time = t1 - t0
    print("All tests time: ", time, "seconds.")
    return results 

In [38]:
results = get_results(all_tests)
display(results)

Running custom1_rnd_pcmn_all...
>> Iteration 1/20
>> Iteration 2/20
>> Iteration 3/20
>> Iteration 4/20
>> Iteration 5/20
>> Iteration 6/20
>> Iteration 7/20
>> Iteration 8/20
>> Iteration 9/20
>> Iteration 10/20
>> Iteration 11/20
>> Iteration 12/20
>> Iteration 13/20
>> Iteration 14/20
>> Iteration 15/20
>> Iteration 16/20
>> Iteration 17/20
>> Iteration 18/20
>> Iteration 19/20
>> Iteration 20/20
Running custom1_rnd_pcmn_mcts_vs_random...
>> Iteration 1/20
>> Iteration 2/20
>> Iteration 3/20
>> Iteration 4/20
>> Iteration 5/20
>> Iteration 6/20
>> Iteration 7/20
>> Iteration 8/20
>> Iteration 9/20
>> Iteration 10/20
>> Iteration 11/20
>> Iteration 12/20
>> Iteration 13/20
>> Iteration 14/20
>> Iteration 15/20
>> Iteration 16/20
>> Iteration 17/20
>> Iteration 18/20
>> Iteration 19/20
>> Iteration 20/20
Running custom1_rnd_pcmn_mc_vs_random...
>> Iteration 1/20
>> Iteration 2/20
>> Iteration 3/20
>> Iteration 4/20
>> Iteration 5/20
>> Iteration 6/20
>> Iteration 7/20
>> Iteration 8/2

Unnamed: 0,TestName,Time_Avg,Time_Std,P_Value_Avg,P_Value_Std,GH0_Value_Avg,GH0_Value_Std,GH1_Value_Avg,GH1_Value_Std,GH2_Value_Avg,GH2_Value_Std,GH3_Value_Avg,GH3_Value_Std
0,custom1_rnd_pcmn_all,14.56,8.89,-1222.05,211.38,-5495.80,3381.74,-5262.95,3498.82,-14609.75,8696.47,-13536.9,8019.7
1,custom1_rnd_pcmn_mcts_vs_random,30.86,18.99,-1261.85,139.66,-31239.90,19473.97,-19213.25,12164.38,,,,
2,custom1_rnd_pcmn_mc_vs_random,3.25,2.60,-1270.40,155.99,-2897.00,2618.78,-3047.65,3056.51,,,,
3,custom1_rnd_pcmn_dqn_vs_random,0.59,0.44,-1169.15,156.43,-10.75,418.67,589.40,441.62,,,,
4,custom1_rnd_pcmn_mcts_vs_mc,49.63,34.99,-1255.70,125.27,-51739.15,37633.68,-34375.50,25373.65,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
58,trickyClassic_rnd_pcmn_mc_vs_random,1.91,1.61,-1460.75,84.15,-1222.90,1706.77,-1364.25,1727.55,,,,
59,trickyClassic_rnd_pcmn_dqn_vs_random,0.26,0.22,-1472.25,57.23,6.55,297.38,806.60,324.59,,,,
60,trickyClassic_rnd_pcmn_mcts_vs_mc,11.25,14.01,-1449.65,42.89,-10908.85,14477.58,-7932.85,10995.40,,,,
61,trickyClassic_rnd_pcmn_mcts_vs_dqn,7.10,5.69,-1446.85,78.29,-6136.95,5550.33,-4809.65,3960.54,,,,


In [39]:
results[results['TestName'].str.contains('all')][['TestName','Time_Avg','GH0_Value_Avg','GH1_Value_Avg','GH2_Value_Avg','GH3_Value_Avg']]

Unnamed: 0,TestName,Time_Avg,GH0_Value_Avg,GH1_Value_Avg,GH2_Value_Avg,GH3_Value_Avg
0,custom1_rnd_pcmn_all,14.56,-5495.8,-5262.95,-14609.75,-13536.9
7,capsuleClassic_rnd_pcmn_all,1.31,-495.15,-683.6,-1732.4,
14,contestClassic_rnd_pcmn_all,2.28,-1186.5,-1401.9,-2384.8,
21,mediumClassic_rnd_pcmn_all,5.91,-5766.1,-4475.6,,
28,minimaxClassic_rnd_pcmn_all,0.09,505.45,209.85,-112.75,
35,originalClassic_rnd_pcmn_all,24.18,-4456.55,-4661.6,-10562.0,-8513.0
42,smallClassic_rnd_pcmn_all,3.21,-3951.55,-2956.05,,
43,smallClassic_rnd_pcmn_mcts_vs_random,1.94,-2129.25,-1391.65,,
44,smallClassic_rnd_pcmn_mc_vs_random,0.19,253.0,235.1,,
45,smallClassic_rnd_pcmn_dqn_vs_random,0.1,207.8,707.95,,


In [40]:
results[results['TestName'].str.contains('random')][['TestName','Time_Avg','GH0_Value_Avg','GH1_Value_Avg']]

Unnamed: 0,TestName,Time_Avg,GH0_Value_Avg,GH1_Value_Avg
1,custom1_rnd_pcmn_mcts_vs_random,30.86,-31239.9,-19213.25
2,custom1_rnd_pcmn_mc_vs_random,3.25,-2897.0,-3047.65
3,custom1_rnd_pcmn_dqn_vs_random,0.59,-10.75,589.4
8,capsuleClassic_rnd_pcmn_mcts_vs_random,1.05,-932.55,-507.6
9,capsuleClassic_rnd_pcmn_mc_vs_random,0.21,108.25,298.9
10,capsuleClassic_rnd_pcmn_dqn_vs_random,0.1,154.85,754.95
15,contestClassic_rnd_pcmn_mcts_vs_random,2.23,-1829.95,-1187.6
16,contestClassic_rnd_pcmn_mc_vs_random,0.25,309.5,92.65
17,contestClassic_rnd_pcmn_dqn_vs_random,0.23,154.55,654.75
22,mediumClassic_rnd_pcmn_mcts_vs_random,3.64,-3350.9,-2382.35
