# Setup

In [1]:
!pip install torchinfo==1.7.1

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip install gym

# Imports

In [2]:
from typing import Tuple
import os
import torch
import gym
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd 

import math
import random
import numpy as np

import torch.nn as nn
import torch.nn.functional as F
import torchvision

from os.path import exists
from os import chdir

In [3]:
if exists("/content/drive/MyDrive/ORT/Posgrado AI/Multiagentes/Obligatorio/ObligatorioPacman"):
  chdir("/content/drive/MyDrive/ORT/Posgrado AI/Multiagentes/Obligatorio/ObligatorioPacman")
else:
  chdir('/content/drive/MyDrive/ORT/Multiagentes/Obligatorio/ObligatorioPacman')

!ls

entregables  game_logic  ghost_training.ipynb  layouts	main.py


In [6]:
from entregables.double_dqn_agent import DoubleDQNAgent
from entregables.qlearning import DQN_Model

In [13]:
from entregables.maxNAgent import MaxNAgent
from game_logic.game_util import process_state
from game_logic.ghostAgents import RandomGhost
from game_logic.PacmanEnvAbs import PacmanEnvAbs
from game_logic.randomPacman import RandomPacman

In [7]:
import warnings
warnings.filterwarnings("ignore")

Setting seeds...

In [8]:
SEED = 142

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

Device type

In [9]:
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
DEVICE

device(type='cuda', index=0)

# Training

## Params

In [10]:
# Hiperparámetros de entrenamiento del agente DQN
TOTAL_STEPS = 5000000
EPISODES = 10000
STEPS = 100000

EPSILON_INI = 1
EPSILON_MIN = 0.02
EPSILON_TIME = (EPSILON_INI - EPSILON_MIN) * TOTAL_STEPS

EPISODE_BLOCK = 10
USE_PRETRAINED = False

BATCH_SIZE = 32
BUFFER_SIZE = 10000

GAMMA = 0.99
LEARNING_RATE = 1e-4

SAVE_BETWEEN_STEPS = 100000

MATRIX_SIZE = 30
ACTION_SPACE_N = 5
AGENT_INDEX = 1
ENV_NAME = 'GhostDQN'

## Functions

In [15]:
def get_default_agents(starting_index, num_ghosts=10):
    agents = []
    for i in range(starting_index, starting_index + num_ghosts):
        agents.append(RandomGhost(index=i))
    return agents

In [None]:
def process_state(state: object, view_distance: Tuple[int, int], agent_index: int) -> np.array:
    """
    Process the game state for training the DQN agent.
    :param state: game state.
    :param view_distance: view of the agent.
    :param turn_index: agent turn.
    """
    state_view = game_util.process_state(state, view_distance, agent_index)
    # we add padding to match net's input, adding 9's as a new value for "unexplored"
    state_pad = np.pad(state_view, ((0, MATRIX_SIZE - state_view.shape[0]), (0, MATRIX_SIZE - state_view.shape[1])), 'constant', constant_values=9)
    return state_pad

## Layouts and view distances

In [17]:
all_layouts = [
    "custom1",
    "custom2",
    "capsuleClassic",
    "contestClassic",
    "mediumClassic",
    "minimaxClassic",
    "openClassic",
    "originalClassic",
    "smallClassic",
    "testClassic",
    "trappedClassic",
    "trickyClassic",
    "mediumGrid",
    "smallGrid"
]

In [None]:
view_distances = [(2, 2), (4, 4), (6, 6), (8, 8), (10, 10), (15, 15), (20, 20), (30, 30)]

## Game env & Agents

In [19]:
pacman_agent = RandomPacman(index=0)

ghost_agent_0 = MaxNAgent(index=AGENT_INDEX, unroll_type="MCTS", max_unroll_depth=12, number_of_unrolls=6)
ghost_agent_1 = RandomGhost(index=2)

agents = [pacman_agent, ghost_agent_0, ghost_agent_1]

# adding 3 agents more
agents.extend(get_default_agents(3, 10))

In [None]:
env = PacmanEnvAbs(agents=agents, view_distance=view_distances[-1])

## Nets

In [None]:
net_a = DQN_Model(input_size=MATRIX_SIZE * MATRIX_SIZE, n_actions=ACTION_SPACE_N).to(DEVICE)
net_b = DQN_Model(input_size=MATRIX_SIZE * MATRIX_SIZE, n_actions=ACTION_SPACE_N).to(DEVICE)

## DQN

In [None]:
agent = DoubleDQNAgent(
    env,
    net_a,
    net_b,
    agents,
    AGENT_INDEX,
    all_layouts,
    view_distances,
    process_state,
    BUFFER_SIZE,
    BATCH_SIZE,
    LEARNING_RATE,
    GAMMA,
    epsilon_i=EPSILON_INI,
    epsilon_f=EPSILON_MIN,
    epsilon_anneal_time=EPSILON_TIME,
    episode_block=EPISODE_BLOCK,
    use_pretrained=USE_PRETRAINED,
    save_between_steps=SAVE_BETWEEN_STEPS
)

In [None]:
rewards = agent.train(EPISODES, STEPS, TOTAL_STEPS, writer_name=ENV_NAME + '_double')

In [None]:
print(f'Accumulated rewards during training: {rewards}')

# Training metrics

In [None]:
%tensorboard  --logdir=runs