# Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from os.path import exists
from os import chdir

In [None]:
if exists("/content/drive/MyDrive/ORT/Posgrado AI/Multiagentes/Obligatorio/ObligatorioPacman"):
  chdir("/content/drive/MyDrive/ORT/Posgrado AI/Multiagentes/Obligatorio/ObligatorioPacman")
else:
  chdir('/content/drive/MyDrive/ORT/Multiagentes/Obligatorio/ObligatorioPacman')

!ls

entregables  ghost_dqn_training.ipynb  layouts	__pycache__  script.py
game_logic   ghost_tester.ipynb        main.py	runs	     weights


## Imports

In [None]:
import torch

from typing import List

from entregables.qlearning import DQN_Model
from entregables.double_dqn_agent import DoubleDQNAgent
from game_logic.ghostAgents import RandomGhost
from entregables.maxNAgent import MaxNAgent
from game_logic.randomPacman import RandomPacman
from game_logic.PacmanEnvAbs import PacmanEnvAbs
import random
import math
import numpy as np

from game_logic import game_util

from main import get_default_agents
import timeit 
import pandas as pd

# Configuration

In [None]:
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(DEVICE)

cpu


In [None]:
class TestParams():
    def __init__(self, test_name, layout, pacman_agent, agents):        
        self.test_name = test_name
        self.layout = layout
        self.pacman_agent = pacman_agent
        self.len_agents = len(agents)
        for i, agent in enumerate(agents):
            setattr(self, f'ghost_agent_{i}', agent)

In [None]:
all_layouts = [
        "custom1",
        "custom2",
        "capsuleClassic",
        "contestClassic",
        "mediumClassic",
        "minimaxClassic",
        "openClassic",
        "originalClassic",
        "smallClassic",
        "testClassic",
        "trappedClassic",
        "trickyClassic",
        "mediumGrid",
        "smallGrid"
    ]

## Functions

In [None]:
def process_state(state, view_distance, agent_index):
    state_view = game_util.process_state(state, view_distance, agent_index)
    state_pad = np.pad(state_view, ((0, MATRIX_SIZE - state_view.shape[0]), (0, MATRIX_SIZE - state_view.shape[1])),
                        'constant', constant_values=1)
    return state_pad

In [None]:
def run_test(test_params: object, extend_agents: bool = False, amount_extended_agents: int = 3):  
    t0 = timeit.default_timer()   
    pacman_agent = test_params.pacman_agent
    agents = [pacman_agent]
    
    for r in range(0, test_params.len_agents):
        ghost = getattr(test_params, f'ghost_agent_{r}')
        agents.append(ghost)

    if extend_agents:
        agents.extend(get_default_agents(amount_extended_agents, 10)) 

    done = False
    env = PacmanEnvAbs(agents=agents, view_distance=(2, 2))
    game_state = env.reset(enable_render=False, layout_name=test_params.layout)
    
    if len(agents) != game_state.getNumAgents():
        print(f'WARNING: Layout only allow {game_state.getNumAgents() - 1} ghost(s).')
    
    turn_index = 0    
    while (not(done)):
        action = agents[turn_index].getAction(game_state)
        game_state, rewards, done, info = env.step(action, turn_index) 
     
        turn_index = (turn_index + 1) % game_state.getNumAgents()  
    
    t1 = timeit.default_timer()    
    time = t1 - t0  
    
    assert(game_state.getNumAgents() >= 2)
    
    ret = game_state.get_rewards()

    while len(ret) < len(agents):
        ret.append(np.nan)
    
    return ret, time

## DQN Nets

In [None]:
MATRIX_SIZE = 30
ACTION_SPACE_N = 5
AGENT_INDEX = 3
ENV_NAME = 'GhostDQN'

VIEW_DISTANCE = (4, 4)

In [None]:
net_a = DQN_Model(input_size=MATRIX_SIZE * MATRIX_SIZE, n_actions=ACTION_SPACE_N).to(DEVICE)
net_b = DQN_Model(input_size=MATRIX_SIZE * MATRIX_SIZE, n_actions=ACTION_SPACE_N).to(DEVICE)

## Agents

In [None]:
pacman_agent = RandomPacman(index=0)

ghost_agent_0 = MaxNAgent(index=1, unroll_type="MCTS", max_unroll_depth=12, number_of_unrolls=6, view_distance=VIEW_DISTANCE)
ghost_agent_1 = MaxNAgent(index=2, unroll_type="MC", max_unroll_depth=12, number_of_unrolls=6, view_distance=VIEW_DISTANCE)
ghost_agent_2 = DoubleDQNAgent(
        None,  # not used for training
        net_a,
        net_b,
        [],  # not used for training
        AGENT_INDEX,   # index: not used for training
        [],  # not used for training
        [],  # not used for training
        process_state,
        0,  # not used for training
        0,  # not used for training
        0,  # not used for training
        0,  # not used for training
        epsilon_i=0,  # not used for training
        epsilon_f=0,  # not used for training
        epsilon_anneal_time=0,  # not used for training
        episode_block=0,  # not used for training
        use_pretrained=True,
        save_between_steps=False,
        view_distance=VIEW_DISTANCE
    )
ghost_agent_3 = RandomGhost(index=4)

agents = [ghost_agent_0, ghost_agent_1, ghost_agent_2, ghost_agent_3]

INFO: Using weights from: ./weights/double_DQNAgent_a.pt & ./weights/double_DQNAgent_b.pt


In [None]:
sample_test = TestParams("PrimerTest", "mediumGrid", pacman_agent, agents)
print(run_test(sample_test))

([-1499, -1359.0, nan, nan, nan], 1.4464981039999998)


# Tests

In [None]:
selected_layouts = [
        "custom1",
        "capsuleClassic",
        "contestClassic",
        "mediumClassic",
        "minimaxClassic",
        "originalClassic",
        "smallClassic",
        "trappedClassic",
        "trickyClassic",
    ]
pacmans = [("rnd_pcmn", RandomPacman(index=0))]

ghosts = [('all', [ghost_agent_0, ghost_agent_1, ghost_agent_2, ghost_agent_3])]

ghost_agent_0 = MaxNAgent(index=1, unroll_type="MCTS", max_unroll_depth=12, number_of_unrolls=6, view_distance=VIEW_DISTANCE)
ghost_agent_3 = RandomGhost(index=2)
ghosts.append(('mcts_vs_random', [ghost_agent_0, ghost_agent_3]))

ghost_agent_1 = MaxNAgent(index=1, unroll_type="MC", max_unroll_depth=12, number_of_unrolls=6, view_distance=VIEW_DISTANCE)
ghosts.append(('mc_vs_random', [ghost_agent_1, ghost_agent_3]))

ghost_agent_2 = DoubleDQNAgent(
        None,  # not used for training
        net_a,
        net_b,
        [],  # not used for training
        1,   # index: not used for training
        [],  # not used for training
        [],  # not used for training
        process_state,
        0,  # not used for training
        0,  # not used for training
        0,  # not used for training
        0,  # not used for training
        epsilon_i=0,  # not used for training
        epsilon_f=0,  # not used for training
        epsilon_anneal_time=0,  # not used for training
        episode_block=0,  # not used for training
        use_pretrained=True,
        save_between_steps=False,
        view_distance=VIEW_DISTANCE
    )
ghosts.append(('dqn_vs_random', [ghost_agent_2, ghost_agent_3]))

ghost_agent_0 = MaxNAgent(index=1, unroll_type="MCTS", max_unroll_depth=12, number_of_unrolls=6, view_distance=VIEW_DISTANCE)
ghost_agent_1 = MaxNAgent(index=2, unroll_type="MC", max_unroll_depth=12, number_of_unrolls=6, view_distance=VIEW_DISTANCE)
ghosts.append(('mcts_vs_mc', [ghost_agent_0, ghost_agent_1]))

ghost_agent_2 = DoubleDQNAgent(
        None,  # not used for training
        net_a,
        net_b,
        [],  # not used for training
        2,   # index: not used for training
        [],  # not used for training
        [],  # not used for training
        process_state,
        0,  # not used for training
        0,  # not used for training
        0,  # not used for training
        0,  # not used for training
        epsilon_i=0,  # not used for training
        epsilon_f=0,  # not used for training
        epsilon_anneal_time=0,  # not used for training
        episode_block=0,  # not used for training
        use_pretrained=True,
        save_between_steps=False,
        view_distance=VIEW_DISTANCE
    )
ghosts.append(('mcts_vs_dqn', [ghost_agent_0, ghost_agent_2]))

ghost_agent_1 = MaxNAgent(index=1, unroll_type="MC", max_unroll_depth=12, number_of_unrolls=6, view_distance=VIEW_DISTANCE)
ghosts.append(('mc_vs_dqn', [ghost_agent_1, ghost_agent_2]))

INFO: Using weights from: ./weights/double_DQNAgent_a.pt & ./weights/double_DQNAgent_b.pt
INFO: Using weights from: ./weights/double_DQNAgent_a.pt & ./weights/double_DQNAgent_b.pt


In [None]:
all_tests = []
for layout in selected_layouts:
    for pacman_name, pacman_agent in pacmans:
        for gh_name, ghs in ghosts:
            name = layout + "_" + pacman_name + "_" + gh_name
            test = TestParams(name, layout, pacman_agent, ghs)
            all_tests.append(test)

In [None]:
NUMBER_OF_RUNS = 70

In [None]:
def get_results(tests: List) -> pd.DataFrame:
    results = []
    t0 = timeit.default_timer()   
    for i in range(len(tests)):
            print(f'Running {tests[i].test_name}...')   
            values = []
            times = []    
            for r in range(NUMBER_OF_RUNS):
                print(f'>> Iteration {r+1}/{NUMBER_OF_RUNS}')
                value, time = run_test(tests[i])            
                values.append(value)
                times.append(time)

            # compute avg and std and and add to results
            value_avg = np.round(np.mean(np.array(values), axis=0), 2)
            time_avg = np.round(np.mean(np.array(times)), 2)
            value_std = np.round(np.std(np.array(values), axis=0), 2)
            time_std = np.round(np.std(np.array(times)), 2)
            
            result = {
                'TestName': tests[i].test_name, 
                'Time_Avg': time_avg, 
                'Time_Std': time_std, 
                'P_Value_Avg': value_avg[0], 
                'P_Value_Std': value_std[0]
            }

            for j in range(tests[i].len_agents):
                result[f'GH{j}_Value_Avg'] = value_avg[j+1]
                result[f'GH{j}_Value_Std'] = value_std[j+1]

            results.append(result)

    results = pd.DataFrame(results)
    t1 = timeit.default_timer()  
    time = t1 - t0
    print("All tests time: ", time, "seconds.")
    return results 

In [None]:
results = get_results(all_tests)
display(results)

Running custom1_rnd_pcmn_all...
>> Iteration 1/50
>> Iteration 2/50
>> Iteration 3/50
>> Iteration 4/50
>> Iteration 5/50
>> Iteration 6/50
>> Iteration 7/50
>> Iteration 8/50
>> Iteration 9/50
>> Iteration 10/50
>> Iteration 11/50
>> Iteration 12/50
>> Iteration 13/50
>> Iteration 14/50
>> Iteration 15/50
>> Iteration 16/50
>> Iteration 17/50
>> Iteration 18/50
>> Iteration 19/50
>> Iteration 20/50
>> Iteration 21/50
>> Iteration 22/50
>> Iteration 23/50
>> Iteration 24/50
>> Iteration 25/50
>> Iteration 26/50
>> Iteration 27/50
>> Iteration 28/50
>> Iteration 29/50
>> Iteration 30/50
>> Iteration 31/50
>> Iteration 32/50
>> Iteration 33/50
>> Iteration 34/50
>> Iteration 35/50
>> Iteration 36/50
>> Iteration 37/50
>> Iteration 38/50
>> Iteration 39/50
>> Iteration 40/50
>> Iteration 41/50
>> Iteration 42/50
>> Iteration 43/50
>> Iteration 44/50
>> Iteration 45/50
>> Iteration 46/50
>> Iteration 47/50
>> Iteration 48/50
>> Iteration 49/50
>> Iteration 50/50
Running custom1_rnd_pcmn_mc

Unnamed: 0,TestName,Time_Avg,Time_Std,P_Value_Avg,P_Value_Std,GH0_Value_Avg,GH0_Value_Std,GH1_Value_Avg,GH1_Value_Std,GH2_Value_Avg,GH2_Value_Std,GH3_Value_Avg,GH3_Value_Std
0,custom1_rnd_pcmn_all,24.87,22.51,-1291.50,168.61,-9178.76,8692.54,-9347.84,8880.95,-25200.02,22891.82,-23235.4,21426.36
1,custom1_rnd_pcmn_mcts_vs_random,33.97,27.74,-1244.56,210.66,-34468.18,28384.73,-21314.38,17927.78,,,,
2,custom1_rnd_pcmn_mc_vs_random,3.97,3.31,-1265.34,163.80,-3470.88,3436.45,-4017.48,3874.01,,,,
3,custom1_rnd_pcmn_dqn_vs_random,0.60,0.52,-1193.74,251.07,-47.06,422.95,593.04,447.00,,,,
4,custom1_rnd_pcmn_mcts_vs_mc,35.46,25.49,-1282.60,142.57,-36813.36,26958.10,-24218.76,18134.17,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
58,trickyClassic_rnd_pcmn_mc_vs_random,1.53,1.65,-1450.44,69.63,-894.48,1629.52,-1092.58,1725.51,,,,
59,trickyClassic_rnd_pcmn_dqn_vs_random,0.48,0.53,-1465.72,106.28,-3.44,382.36,716.60,354.91,,,,
60,trickyClassic_rnd_pcmn_mcts_vs_mc,10.45,7.71,-1453.44,46.74,-9771.10,7570.09,-7162.04,5654.32,,,,
61,trickyClassic_rnd_pcmn_mcts_vs_dqn,10.54,6.87,-1429.10,67.88,-9184.38,6497.78,-6916.24,4654.07,,,,


In [None]:
results[results['TestName'].str.contains('all')][['TestName','Time_Avg','GH0_Value_Avg','GH1_Value_Avg','GH2_Value_Avg','GH3_Value_Avg']]

Unnamed: 0,TestName,Time_Avg,GH0_Value_Avg,GH1_Value_Avg,GH2_Value_Avg,GH3_Value_Avg
0,custom1_rnd_pcmn_all,24.87,-9178.76,-9347.84,-25200.02,-23235.4
7,capsuleClassic_rnd_pcmn_all,1.76,-772.36,-921.88,-2300.86,
14,contestClassic_rnd_pcmn_all,2.38,-1115.62,-1340.56,-2635.22,
21,mediumClassic_rnd_pcmn_all,5.55,-5244.84,-4089.96,,
28,minimaxClassic_rnd_pcmn_all,0.11,418.58,240.78,-106.92,
35,originalClassic_rnd_pcmn_all,20.74,-3961.34,-3846.04,-9098.94,-7228.48
42,smallClassic_rnd_pcmn_all,2.77,-3382.14,-2229.16,,
43,smallClassic_rnd_pcmn_mcts_vs_random,1.98,-1981.98,-1444.16,,
44,smallClassic_rnd_pcmn_mc_vs_random,0.44,-54.84,-131.86,,
45,smallClassic_rnd_pcmn_dqn_vs_random,0.09,161.34,761.48,,


In [None]:
results[results['TestName'].str.contains('random')][['TestName','Time_Avg','GH0_Value_Avg','GH1_Value_Avg']]

Unnamed: 0,TestName,Time_Avg,GH0_Value_Avg,GH1_Value_Avg
1,custom1_rnd_pcmn_mcts_vs_random,33.97,-34468.18,-21314.38
2,custom1_rnd_pcmn_mc_vs_random,3.97,-3470.88,-4017.48
3,custom1_rnd_pcmn_dqn_vs_random,0.6,-47.06,593.04
8,capsuleClassic_rnd_pcmn_mcts_vs_random,1.3,-1178.22,-818.02
9,capsuleClassic_rnd_pcmn_mc_vs_random,0.21,285.02,157.38
10,capsuleClassic_rnd_pcmn_dqn_vs_random,0.12,245.68,645.88
15,contestClassic_rnd_pcmn_mcts_vs_random,4.01,-3967.4,-2983.42
16,contestClassic_rnd_pcmn_mc_vs_random,0.46,-25.44,-52.62
17,contestClassic_rnd_pcmn_dqn_vs_random,0.18,265.78,585.98
22,mediumClassic_rnd_pcmn_mcts_vs_random,4.75,-4446.86,-3251.1
