In [1]:
%cd ../search/search
#! python pacman.py --layout originalClassic

/home/gabriel/Documentos/Tarefas/Universidade/MC886/projects_MachineLearning/project3_ra214129_ra216179/search/search


In [2]:
from pacman import runGames, readCommand
from game import Agent, Directions
from util import manhattanDistance
import numpy as np
import random
import json

In [3]:
random.seed(7)

In [4]:
def getState(state):
    currentState = (state.getPacmanPosition(),Qlearn.getNearestFoodDis(state),state.getNumFood() + len(state.getCapsules()),Qlearn.nearestGhostPos(state))
    return currentState

In [36]:
class Qlearn(Agent):
    def __init__(self, alpha,gamma,epsilon = 1,Qtable = {}):
        self.Qtable = Qtable
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.score = 0
        self.action = None
        self.currentState = None
    
    def getQvalue(self,state,action):
        pair = str((state,action))
        return self.Qtable.get(pair,0.0)
    
    def getAction(self, state):
        actions = state.getLegalPacmanActions()
        currentState = getState(state)
        action = self.getBestAction(currentState,actions)
        state2 = state.generatePacmanSuccessor(action)
        nextState = getState(state2)
        if state2.isLose():
            reinforcement = -20000
        elif state2.isWin():
            reinforcement = 20000
        else:
            reinforcement = self.getReward(currentState,nextState,state2.getScore())
        self.learn(currentState,nextState,action,actions,reinforcement)
        self.action = action
        self.currentState = currentState
        return action
        
        
    def nearestGhostPos(state):
        ghostList = state.getGhostPositions()
        nearestDistance = Qlearn.getNearGhost(state.getPacmanPosition(),ghostList)
        return nearestDistance

    def getNearGhost(pacman,ghosts):
        minDis = float("inf")
        minPos = -1
        for i in range(len(ghosts)):
            distance = manhattanDistance(pacman,ghosts[i])
            if distance < minDis:
                minDis = distance
                minPos = i
        if minPos > -1:
            return ghosts[minPos]
        else:
            return (-1,-1)

    def getNearestFoodDis(state):
        posList = Qlearn.getFoodPos(state.getFood())
        posList = posList + state.getCapsules()
        minDis = minDis = float("inf")
        #minPos = (-1,-1)
        pacPos = state.getPacmanPosition()
        for i in posList:
            distance = manhattanDistance(pacPos,i)
            if distance < minDis:
                minDis = distance
                #minPos = i
        return minDis


    def getFoodPos(grid):
        posList = []
        gridList = grid.asList()
        for i in range(len(gridList)):
            for j in range(len(gridList[i])):
                if gridList[i][j]:
                    posList.append((i,j))
        return posList

    def getBestAction(self,currentState,actions):
        if random.random() < self.epsilon:
            return random.choice(actions)
        maxValue = float("-inf")
        maxAction = []
        for i in actions:
            value = self.getQvalue(currentState,i)
            if value > maxValue:
                maxValue = value
                maxAction = [i]
            if value == maxValue:
                maxAction.append(i)
        #print(maxValue)
        #print(maxAction)
        return random.choice(maxAction)

    def learn(self,currentState,nextState,action,actions,reinforcement):
        nextActions = [self.getQvalue(nextState,a) for a in actions]
        if len(nextActions) > 0:
            newQ = max(nextActions)
        else:
            newQ = 0
        currentQ = self.getQvalue(currentState,action)
        self.Qtable[str((currentState,action))] = currentQ + self.alpha * (reinforcement + (self.gamma*newQ) - currentQ)

    def setEpsilon(self,epsilon):
        self.epsilon = epsilon

    def serialize(self,path):
        try:
            f = open(path,"w")
            json.dump(self.Qtable,f)
            f.close()
        except:
            raise Exception("You've got an error")

    def desserialize(self,path):
        try:
            f = open(path,"r")
            self.Qtable = json.load(f)
            f.close()
        except:
            raise Exception("You've got an error")
    
    def getReward(self,state1,state2,score):
        pacGhostDistance1 = manhattanDistance(state1[0],state1[3])
        pacGhostDistance2 = manhattanDistance(state2[0],state2[3])
        #fDis1 = manhattanDistance(state1[0],state1[1])
        #fDis2 = manhattanDistance(state2[0],state2[1])
        eatenFood = state2[2] - state1[2]
        ghostDistance = pacGhostDistance2 - pacGhostDistance1
        foodDistance = state2[1] - state1[1]
        if ghostDistance > 0:
            ghostReward = ghostDistance*200
        else:
            ghostReward = 200*ghostDistance
        if foodDistance < 0:
            foodReward = -400 * foodDistance
        else:
            foodReward = 0
        if eatenFood > 0:
            eatReward = 400*eatenFood
        else:
            eatReward = 0
        scoreDifference = score - self.score
        if scoreDifference > 0:
            scoreReward = scoreDifference
        else:
            scoreReward = scoreDifference*1.25
        return ghostReward + foodReward + eatReward + scoreReward
    
    def win(self,terminal):
        self.learn(self.currentState,getState(terminal.state),self.action,terminal.state.getLegalPacmanActions(),20000)
    
    def lose(self,terminal):
        self.learn(self.currentState,getState(terminal.state),self.action,terminal.state.getLegalPacmanActions(),-20000)
            

Best:  
GhostReward = 200/200  
FoodReward = -400/0  
EatReward = 400/0  
ScoreReward = 1/0.75  
Alpha = 0.85  
Gamma = 0.3  

First Win: 614  
Total win: 53  
Average: -342.6344

In [25]:
agente = Qlearn(0.75,0.4,epsilon = 1)
args = readCommand(["-q", "--pacman", "LeftTurnAgent","--layout","smallClassic"])
args["pacman"] = agente

['-q', '--pacman', 'LeftTurnAgent', '--layout', 'smallClassic']


In [26]:
a = runGames(**args)
print(a)

[<game.Game object at 0x000001EDDAF3A2C8>]


In [13]:
args = readCommand(["-q", "--pacman", "LeftTurnAgent","--layout","smallClassic"])

['-q', '--pacman', 'LeftTurnAgent', '--layout', 'smallClassic']


In [38]:
a = []

In [39]:
agente = Qlearn(0.85,0.3,epsilon = 1)
args["pacman"] = agente

In [40]:
victorys = []
k = 40000
summ = 0
for i in range(k):
    agente.setEpsilon(1/(i+1))
    a.append(runGames(**args))
    if a[i][0].state.isLose():
        agente.lose(a[i][0])
    elif a[i][0].state.isWin():
        agente.win(a[i][0])
        victorys.append(a[i])
        print(i)
    summ += a[i][0].state.getScore()

Pacman emerges victorious! Score: 438
25563
Pacman emerges victorious! Score: 1026
26053
Pacman emerges victorious! Score: 636
26636
Pacman emerges victorious! Score: 858
27819
Pacman emerges victorious! Score: 585
31369
Pacman emerges victorious! Score: 1151
33530
Pacman emerges victorious! Score: 979
36611
Pacman emerges victorious! Score: 779
38505
Pacman emerges victorious! Score: 686
39731


In [None]:
for i in range(50):
    print(a[len(a)-1-i][0].state.isWin())
    print(a[len(a)-1-i][0].state.getScore())

In [41]:
agente.serialize("../../notebooks/results/a085-g03-d080721-2.json")

In [42]:
print(len(victorys))
print(summ/k)

9
-354.700875


In [19]:
class smartPacman(Agent):
    def __init__(self, path):
        self.desserialize(path)
    
    def getQvalue(self,state,action):
        pair = str((state,action))
        return self.Qtable.get(pair,0.0)
    
    def getAction(self, state):
        actions = state.getLegalPacmanActions()
        currentState = getState(state)
        action = self.getBestAction(currentState,actions)
        return action
    
    def getBestAction(self,currentState,actions):
        maxValue = float("-inf")
        maxAction = []
        for i in actions:
            value = self.getQvalue(currentState,i)
            if value > maxValue:
                maxValue = value
                maxAction = [i]
            if value == maxValue:
                maxAction.append(i)
        #print(maxValue)
        #print(maxAction)
        return random.choice(maxAction)
    
    def desserialize(self,path):
        try:
            f = open(path,"r")
            self.Qtable = json.load(f)
            f.close()
        except:
            raise Exception("You've got an error")


In [30]:
#agenteEsperto = smartPacman(".../../notebooks/results/a085-g03-d080721.json")
agenteEsperto = smartPacman("../../notebooks/results/a085-g03-d080721.json")
argsV = readCommand(["--pacman", "LeftTurnAgent","--layout","smallClassic"])
argsV["pacman"] = agenteEsperto

['--pacman', 'LeftTurnAgent', '--layout', 'smallClassic']


In [35]:
a = runGames(**argsV)
print(a[0].state.isWin())
print(a[0].state.getScore())

False
-454.0
