In [2]:
from pacman import *
import numpy as np
import random
import math
import util
from game import Directions
from game import Agent

In [None]:
class Cell:
    coordinates = None
    adjacency = np.array([])
    visited = False
    parent = None

class Node:
    nature = None
    subnature = None
    value = None
    parent = None
    children = np.array([])

def get_list_and_dict_of_cells(layout):
    dict_of_cells = dict([])
    list_of_cells = np.array([])

    for x in range(layout.width):
        for y in range(layout.height):
            if not layout.walls[x][y]:#Se não é parede
                new_cell = Cell()
                new_cell.coordinates = (x,y)
                dict_of_cells[str(x) + ',' + str(y)] = new_cell
                list_of_cells = np.append(list_of_cells, new_cell)

    for cell in list_of_cells:
        x = cell.coordinates[0]
        y = cell.coordinates[1]

        if x - 1 >= 0 and not layout.walls[x-1][y]:
            cell.adjacency = np.append(cell.adjacency, dict_of_cells[str(x-1) + ',' + str(y)])

        if y - 1 >= 0 and not layout.walls[x][y-1]:
            cell.adjacency = np.append(cell.adjacency, dict_of_cells[str(x) + ',' + str(y-1)])

        if x + 1 <= layout.width and not layout.walls[x+1][y]:
            cell.adjacency = np.append(cell.adjacency, dict_of_cells[str(x+1) + ',' + str(y)])

        if y + 1 <= layout.height and not layout.walls[x][y+1]:
            cell.adjacency = np.append(cell.adjacency, dict_of_cells[str(x) + ',' + str(y+1)])
    
    return list_of_cells, dict_of_cells
    
#Partial BFS until find final cell
def find_shortest_path(initial_cell_coordinates, final_cell_coordinates, list_of_cells, dict_of_cells):
    
    initial_cell = dict_of_cells[str(initial_cell_coordinates[0]) + ',' + str(initial_cell_coordinates[1])]
    final_cell = dict_of_cells[str(final_cell_coordinates[0]) + ',' + str(final_cell_coordinates[1])]
    
    for cell in list_of_cells:
        cell.visited = False
        cell.parent = None
    
    queue = []
    queue.append(initial_cell)
    
    while not final_cell.visited:
        current_cell = queue.pop(0)
        current_cell.visited = True
        for adjacent_cell in current_cell.adjacency:
            if not adjacent_cell.visited:
                queue.append(adjacent_cell)
                adjacent_cell.parent = current_cell
                
    path = []
    current_cell = final_cell
    while current_cell != initial_cell:
        path.append(current_cell)
        current_cell = current_cell.parent
    path.append(current_cell)
    
    path.reverse()
    
    for i in range(len(path)-1):
        direction = None
        if path[i].coordinates[0] > path[i+1].coordinates[0]:
            direction = Directions.WEST
        elif path[i].coordinates[0] < path[i+1].coordinates[0]:
            direction = Directions.EAST
        elif path[i].coordinates[1] > path[i+1].coordinates[1]:
            direction = Directions.SOUTH
        else:
            direction = Directions.NORTH
        key = str(path[i].coordinates[0]) + ',' + str(path[i].coordinates[1]) + '-' + str(final_cell.coordinates[0]) + ',' + str(final_cell.coordinates[1])
        greedy_moves[key] = direction
    
class MyAgent(Agent):

    chromosome = None
    
    def find_closest_food(self, state):
        food_list = state.getFood().asList()
        closest_food_coordinates = None
        closest_food_dist = math.inf
        for i in range(len(food_list)):
            current_dist = manhattanDistance(state.getPacmanPosition(), food_list[i])
            if current_dist < closest_food_dist:
                closest_food_dist = current_dist
                closest_food_coordinates = food_list[i]
        return closest_food_coordinates, closest_food_dist
    
    def find_closest_active_ghost(self, state):
        ghost_list = state.getGhostPositions()
        closest_ghost_coordinates = None
        closest_ghost_dist = math.inf
        for i in range(len(ghost_list)):
            current_dist = manhattanDistance(state.getPacmanPosition(), ghost_list[i])
            if current_dist < closest_ghost_dist:
                closest_ghost_dist = current_dist
                closest_ghost_coordinates = ghost_list[i]
        return closest_ghost_coordinates, closest_ghost_dist     
            
    def parse_chromosome(self, state): 
        current_node = self.chromosome #Initially it is the root of the decision tree
        while current_node.nature != 'Action':
            if current_node.nature == 'Ifle':
                children = current_node.children
                for child in children:
                    if child.subnature == 'DistToFood':
                        child.value = self.find_closest_food(state)[1]
                    if child.subnature == 'DistToActiveGhost':
                        child.value = self.find_closest_active_ghost(state)[1]
                if children[0].value <= children[1].value:
                    current_node = children[2]
                else:
                    current_node = children[3]
                    
        # Deal with action
        if current_node.subnature == 'GetFood':
            closest_food_coordinates = self.find_closest_food(state)[0]
            current_position = list(state.getPacmanPosition())
            key = str(current_position[0]) + ',' + str(current_position[1]) + '-' + str(closest_food_coordinates[0]) + ',' + str(closest_food_coordinates[1])
            if key not in greedy_moves:
                find_shortest_path(current_position, closest_food_coordinates, list_of_cells, dict_of_cells)
            action = greedy_moves[key]
            return action
        elif current_node.subnature == 'EscapeFromGhost':
            current_position = list(state.getPacmanPosition())
            closest_ghost_coordinates = self.find_closest_active_ghost(state)[0]
            max_dist = 0
            max_dist_move = None
            for action in state.getLegalPacmanActions():

                if action == Directions.WEST:
                    next_position = [current_position[0] - 1, current_position[1]]
                elif action == Directions.EAST:
                    next_position = [current_position[0] + 1, current_position[1]]
                elif action == Directions.NORTH:
                    next_position = [current_position[0], current_position[1] + 1]
                elif action == Directions.SOUTH:
                    next_position = [current_position[0], current_position[1] - 1]
                else:
                    next_position = [current_position[0], current_position[1]]
                    
                if manhattanDistance(next_position, closest_ghost_coordinates) > max_dist:
                    max_dist_move = action
                    max_dist = manhattanDistance(next_position, closest_ghost_coordinates)
                    print(max_dist_move)
            return max_dist_move
                
        
    
    def getAction(self, state):
        #print ("\nLocation: " + str(state.getPacmanPosition()))
        #print ("Actions available: " + str(state.getLegalPacmanActions()))
        action = self.parse_chromosome(state)
        return action
        

In [None]:
seed = 36516285968924
random.seed(seed)

input_str = "-l mediumClassic"
args = readCommand(input_str.split())
agent = MyAgent()
args['pacman'] = agent
layout = args['layout']

list_of_cells, dict_of_cells = get_list_and_dict_of_cells(layout)


root = Node()
c1 = Node()
c2 = Node()
c3 = Node()
c4 = Node()
root.nature = 'Ifle'
c1.nature = 'Data'
c1.subnature = 'DistToActiveGhost'
c2.nature = 'Data'
c2.subnature = 'Constant'
c2.value = 2
c3.nature = 'Action'
c3.subnature = 'EscapeFromGhost'
c4.nature = 'Action'
c4.subnature = 'GetFood'
root.children = np.array([c1,c2,c3,c4])

agent.chromosome = root

greedy_moves = dict([])

runGames(**args)


- MDP formutation:
Environment: labirinto do pacman

States: (conjunto de features, taken from article [x])
    - grid do labirinto
    - posições das comidas
    - posições dos fantasmas ativos
    - posições dos fantasmas debilitados
    - número de fantasmas ativos a um passo de distância
    - número de fantasmas debilitados a um passo de distância
    - distância para a comida mais próxima
    - está comendo comida (0/1) [?]

Ações:
    - NORTH, SOUTH, WEST, EAST, STOP
    
Reward funciton:
    - 1: quando come uma comida;
    - 10: quando come uma pílula;
    - 100: quando come um fantasma debilitado;
    - -200: quando é comido por um fantasma.

- Discretization model:


- number of training episodes:
começar com 1000

- Stop criteria:
partida finalizada: pacman morre ou come tudo

- learning rate and other parameters:
alpha = 0.5
gamma = 0.8
epsilon = 0.1



In [42]:
m = n = None
#class Qstate(state):
    
#   def __init__():
#       self.id 
#       self.posFood 
#       self.posGhost 
#      self.nActiveGhost = 0
#       self.nScaredGhost = 0
#       self.dist_closest_food = 0
#       self.pos_closest_food = 0
        
class RLAgent(Agent):
    qtable = {}
    alpha = 0.5
    gamma = 0.5
    epsilon = 0.1
        
        
    def update(self, state, action, qstate):
        global m
        global n

        future_state = state.generatePacmanSuccessor(action)
        reward = future_state.getScore() - state.getScore()        
        if future_state.getScore() >= 9 + state.getScore():
            reward = 10
        if future_state.getScore() >= 100 + state.getScore():
            reward = 100
        if future_state.isLose():
            reward = -200
        
        future_qstate = generate_qstate_key(future_state,m,n)
        future_action = self.getBestActionFromQtable(future_state, future_qstate)
        print("future state: ", future_state.getScore(), future_state.getPacmanPosition())
        max_future = self.qtable[(future_qstate,future_action)]
        
        old_value = self.qtable[(qstate,action)]
        new_value = old_value + self.alpha*(reward + self.gamma*max_future - old_value)
        self.qtable[(qstate,action)] = new_value
    
    def getBestActionFromQtable(self, state, qstate):
        best_action = None
        max_qvalue = -math.inf
        
        legalActions = state.getLegalPacmanActions()
        for action in legalActions:
            if (qstate,action) not in self.qtable:
                self.qtable[(qstate,action)] = 0
            if self.qtable[(qstate,action)] > max_qvalue:
                max_qvalue = self.qtable[(qstate,action)]
                best_action = action
            
        return best_action
    
    def getQAction(self, state, qstate):
        legalActions = state.getLegalPacmanActions()
        
        # epsilon-greedy policy
        p = random.random()
        if p < self.epsilon:
            action = random.choice(legalActions)
        else:
            action = self.getBestActionFromQtable(state, qstate)
        
        print("Location: ", state.getPacmanPosition())
        print("Actions available: ", state.getLegalPacmanActions())
        print("Action taken: ", action)
        print("Score: ", state.getScore())
        return action
   
    
    def getAction(self, state):
        qstate = generate_qstate_key(state, m, n)
        print(state.getPacmanPosition())
        print(qstate)
        action = self.getQAction(state, qstate)
        self.update(state, action, qstate)
        print(action)
        return action
        
        
        
def get_qstate_key_char(state, x, y):
    if (x,y) in state.getGhostPositions():
        c = 'A' # fantasma ativo
        for i in (1,range(state.getNumAgents())):
            if (x,y) == state.getGhostPosition(i) and state.data.agentStates[i].scaredTimer > 1:
                c = 'S' # fantasma assustado
        return c
    elif (x,y) in state.getCapsules():
        return 'C'
    elif (state.hasFood(x,y)):
        return 'F'
    elif (state.hasWall(x,y)):
        return 'W'
    else:
        return 'E'

def generate_qstate_key(state, m, n):
    key = ['O','O','O','O','O','O','O','O']
    pacman = list(state.getPacmanPosition())
    
    # canto superior esquerdo key[0]
    x = pacman[0]-1
    y = pacman[1]+1
    if x >= 1 and y <= m:
        key[0] = get_qstate_key_char(state, x, y)
    
    # meio superior key[1]
    x = pacman[0]
    y = pacman[1]+1
    if y <= m:
        key[1] = get_qstate_key_char(state, x, y)
        
    # canto superior direito key[2]
    x = pacman[0]+1
    y = pacman[1]+1
    if x <= n and y <= m:
        key[2] = get_qstate_key_char(state, x, y)
    
    # meio esquerdo key[3]
    x = pacman[0]-1
    y = pacman[1]
    if x >= 1:
        key[3] = get_qstate_key_char(state, x, y)

    # meio direito key[4]
    x = pacman[0]+1
    y = pacman[1]
    if x <= n:
        key[4] = get_qstate_key_char(state, x, y)
    
    # canto inferior esquerdo key[5]
    x = pacman[0]-1
    y = pacman[1]-1
    if x >= 1 and y >=1:
        key[5] = get_qstate_key_char(state, x, y)
        
    # meio inferior key[6]
    x = pacman[0]
    y = pacman[1]-1
    if y >= 1:
        key[6] = get_qstate_key_char(state, x, y)
    
    # canto inferior direito key[7]
    x = pacman[0]+1
    y = pacman[1]-1
    if x <= n and y >= 1:
        key[7] = get_qstate_key_char(state, x, y)
    
    return ''.join(key)
    
    
def RL_train_agent(layout, alpha, gamma, epsilon, n_episodes):
    # implementar o treinamento aqui, de acordo com os parâmetros:
    alpha = 0.5
    gamma = 0.5
    epsilon = 0.5
    n_episodes = 10
    

In [43]:
seed = 36516285968924
random.seed(seed)

input_str = "-l mediumClassic"
args = readCommand(input_str.split())
agent = RLAgent()
args['pacman'] = agent
layout = args['layout']

qtable_1 = {}
m = layout.width
n = layout.height
n_episodes = 10
for i in range(0,n_episodes):
    agent = RLAgent()
    agent.qtable = qtable_1
    games = runGames(**args)
    print(games[0])


#qtable_2 = {}
#n_episodes = 100
#for i in n_episodes:
#    runGames(**args)


#qtable_3 = {}
#n_episodes = 100
#for i in n_episodes:
#    runGames(**args)


(9, 1)
WWWFFOOO
Location:  (9, 1)
Actions available:  ['East', 'West', 'Stop']
Action taken:  East
Score:  0.0
future state:  9.0 (10, 1)
East
(10, 1)
WWWEFOOO
Location:  (10, 1)
Actions available:  ['East', 'West', 'Stop']
Action taken:  Stop
Score:  9.0
future state:  8.0 (10, 1)
Stop
(10, 1)
WWWEFOOO
Location:  (10, 1)
Actions available:  ['East', 'West', 'Stop']
Action taken:  East
Score:  8.0
future state:  17.0 (11, 1)
East
(11, 1)
WWOEOOOO
Location:  (11, 1)
Actions available:  ['East', 'West', 'Stop']
Action taken:  East
Score:  17.0
future state:  26.0 (12, 1)
East
(12, 1)
WWOEOOOO
Location:  (12, 1)
Actions available:  ['East', 'West', 'Stop']
Action taken:  East
Score:  26.0
future state:  35.0 (13, 1)
East
(13, 1)
WFOEOOOO
Location:  (13, 1)
Actions available:  ['North', 'West', 'Stop']
Action taken:  North
Score:  35.0
future state:  44.0 (13, 2)
North
(13, 2)
FFOWOEEO
Location:  (13, 2)
Actions available:  ['North', 'South', 'Stop']
Action taken:  North
Score:  44.0
futur

TypeError: list indices must be integers or slices, not range