In [1]:
%pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


# Agentes para el juego 2048

In [2]:
from game import GameOf2048
import numpy as np
from random import choices, seed

## Búsqueda Montecarlo

El agente intenta predecir los puntajes obtenidos en cada movimiento recorriendo varios escenarios futuros. Más específicamente, se busca en dos movimientos hacia adelante y hasta tres escenarios nuevos por cada movimiento.

In [3]:
class DTAgent:
    # tries to do the move with the best expected best expected score.
    # since the state space is so huge,
    # we look ahead to some max depth.
    # the depth changes depending on how many tiles are numbered.
    # less tiles -> less risk of losing -> less lookahead to survive
    # more tiles -> less breadth to look at -> more depth to look at


    def __init__(self):
        self.cache = {}
        self.moves = 0
    
    def nextMove(self, board):
        # we assume the board can be moved
        moves = []
        for move in range(4):
            self.game = GameOf2048()
            # moves are ints,
            # this is explained in the GameOf2048 class definition

            moved, _ = self.game.transform(board, move)
            # numTiles = len(list(self.game.availableTiles(moved)))
            depth = 2 # min(14//numTiles+1, 4)

            if not np.array_equal(board, moved):
                self.totalScore = 0
                self.expectedPointsFrom(moved, depth)
                moves.append((move, self.totalScore))
            else:
                pass # not gonna use useless moves
        
        moves.sort(key=lambda x: x[1])
        return moves[-1][0]


    def expectedPointsFrom(self, board, depth, pastScore=0):
        key = str(board)
        if key in self.cache:
            self.totalScore += self.cache[key]
            return
        
        lost = self.game.verifyLoss(board)
        if depth == 0 or lost:
            self.totalScore += pastScore
            self.cache[str(board)] = pastScore
            return
        # else...

        for move in range(4):
            moved, points = self.game.transform(board, move)

            if np.array_equal(board, moved):
                continue # nothing to do, useless move
            else:
                tiles = list(self.game.availableTiles(moved))
                samples = min(len(tiles),3)

                for coord in choices(tiles, k=samples):
                    x, y = coord
                    
                    moved[x,y] = 4
                    self.expectedPointsFrom(moved, depth-1, points+pastScore)

                    moved[x,y] = 2
                    self.expectedPointsFrom(moved, depth-1, 9*(points+pastScore))
                    
                    moved[x,y] = 0
                    

### Prueba de efectitividad

Se realizan 50 experimentos en los que el agente comienza en un tablero nuevo y puede realizar hasta 200 movimientos.

In [13]:
env = GameOf2048()
agent = DTAgent()

seed(0)
points = []
steps = []
lost = []
maxTile = []

for _ in range(50):
    env.reset()
    
    total = 0
    
    stepAdded = False
    for step in range(200):
        if env.lost:
            steps.append(step+1)
            stepAdded=True
            break
        total += env.step(agent.nextMove(env.currentBoard))[1]
        
    if not stepAdded:
        steps.append(step+1)
    points.append(total)
    lost.append(env.lost)
    maxTile.append(np.max(env.currentBoard))

In [18]:
min(points),max(points),np.mean(points),np.median(points)

(1276.0, 2660.0, 2452.64, 2520.0)

La media y mediana de puntaje son muy cercanos al máximo puntaje.

In [15]:
len([i for i in lost if i])/len(lost)

0.12

En el 12% de los experimentos el juego se perdió.

In [16]:
max(maxTile)

256

La casilla más grande alcanzada fue 256.

La siguiente pieza de código abre una instancia del juego en Mozilla Firefox y permite que el agente juegue en ella.

In [24]:
from control2048 import Control

env.reset()
    
total = 0

controller = Control(sleepTime=0.2)

stepAdded = False
for step in range(600):
    if env.verifyLoss(controller.currentGrid):
        print(step+1)
        stepAdded=True
        break
    move = agent.nextMove(controller.currentGrid)
    controller.pressArrow(move)
    
if not stepAdded:
    print(step+1)
print(total)
print(env.verifyLoss(controller.currentGrid))
print(np.max(controller.currentGrid))

420
0
False
2
