In [1]:
from New2048 import Game2048 as _2048
import numpy as np
import New2048

In [2]:
game = _2048(4)
game.start_game
game.playAdversaryAction()
#game.playRandUser()
print(game)

+------+------+------+------+
|  2   |      |      |      |
+------+------+------+------+
|      |      |      |      |
+------+------+------+------+
|      |      |      |      |
+------+------+------+------+
|      |      |      |      |
+------+------+------+------+


In [3]:
#New2048.main()

# BASELINE - Monte Carlo

In [4]:
# Starter Code: MCTS Implementation
import time


class Tree():
    def __init__(self, *, start_grid=None, parent=None, move=None):
        if parent is None:
            self.parent = None
            self.move = None
            self.grid = start_grid
        else:
            self.parent = parent
            self.move = move
            self.grid = parent.grid.copy()
            if self.grid.play_state:
                self.grid.playUserAction(move)
            else:
                self.grid.playAdversaryAction()
        
        self.values = []
        self.n = 0
        if self.is_terminal_state:
            self.unexplored_moves = set()
        else:
            if self.grid.play_state:
                self.unexplored_moves = set(self.grid.get_user_actions())
            else:
                #print(self.grid.getAdverseryActions())
                self.unexplored_moves = set(self.grid.getAdverseryActions())
        self.children = set([])
            
    @property
    def fully_expanded(self):
        return len(self.unexplored_moves) == 0
    
    @property
    def is_terminal_state(self):
        return (self.grid.winner is not None)


def monte_carlo_tree_search(start_grid, num_iterations=1000):
    """MCTS core loop"""
    # Start by creating the root of the tree.
    root = Tree(start_grid=start_grid)
    
    # Loop through MCTS iterations.
    for _ in range(num_iterations):
        # One step of MCTS iteration
        leaf = traverse(root)
        sim_res = rollout(leaf, start_grid)
        backpropagate(leaf, sim_res)

    # When done iterating, return the 'best' child of the root node.
    return best_child(root).move

def best_child(node):
    """When done sampling, pick the child visited the most."""
    #print(node)
    return max(node.children, key=lambda child: child.n)


def best_uct(node, C=5):
    """Pick the best action according to the UCB/UCT algorithm"""
    chos_val = float("-inf")
    for nub in node.children:
        temp = np.sum(nub.values)/nub.n + C * np.sqrt(np.log(node.n)/nub.n)
        if temp >chos_val:
            chos_val = temp
            chose = nub
    try:
        return chose
    except:
        print(node.grid)
        return chose


def traverse(node):
    # If fully explored, pick one of the children
    while node.fully_expanded and not node.is_terminal_state:
        node = best_uct(node)

    # If the node is terminal, return it
    if node.is_terminal_state:
        return node
    
    # If the node is not terminal:
    # 1. pick a new move from 'unexplored_moves'
    move = node.unexplored_moves.pop()
    # 2. create a new child
    new_child = Tree(parent=node, move=move)
    # 3. add that child to the list of children
    node.children.add(new_child)
    # 4. return that new child
    return new_child


def rollout(node, start_grid):
    winner = node.grid.copy().play_random_moves_until_done()
    if winner == 0:
        return 0
    elif winner == start_grid.play_state:
        return 1
    else:
        return -1


def backpropagate(node, simulation_result):
    """Update the node and its parent (via recursion)."""
    if node is None:
        return
    node.n += 1
    node.values.append(simulation_result)
    backpropagate(node.parent, simulation_result)
    

In [6]:
board = _2048(4)
tot_time = 0
while board.winner == None:
    #print(board)
    if board.play_state:
        stime = time.time()
        action = monte_carlo_tree_search(board, 50)
        tot_time += stime
        board.playUserAction(action)
        board.play_state = False
        print(board)
    else:
        board.playAdversaryAction()
        board.play_state = True
    
    

+------+------+------+------+
|      |      | 2.0  |      |
+------+------+------+------+
|      |      |      |      |
+------+------+------+------+
|      |      | 2.0  |      |
+------+------+------+------+
|      |      |      |      |
+------+------+------+------+
+------+------+------+------+
|      |      |      |      |
+------+------+------+------+
|      |      |      |      |
+------+------+------+------+
|      |      |      |      |
+------+------+------+------+
|      | 2.0  | 4.0  |      |
+------+------+------+------+
+------+------+------+------+
|      |      |      |      |
+------+------+------+------+
| 2.0  |      |      |      |
+------+------+------+------+
|      |      |      |      |
+------+------+------+------+
| 2.0  | 2.0  | 4.0  |      |
+------+------+------+------+
+------+------+------+------+
| 4.0  | 2.0  | 4.0  | 2.0  |
+------+------+------+------+
|      |      |      |      |
+------+------+------+------+
|      | 2.0  | 4.0  | 2.0  |
+------+--

+------+------+------+------+
| 8.0  | 2.0  | 16.0 | 8.0  |
+------+------+------+------+
| 32.0 |128.0 | 4.0  | 2.0  |
+------+------+------+------+
| 2.0  | 4.0  | 8.0  |      |
+------+------+------+------+
| 32.0 | 2.0  | 2.0  | 2.0  |
+------+------+------+------+
+------+------+------+------+
| 8.0  | 2.0  | 16.0 |      |
+------+------+------+------+
| 32.0 |128.0 | 4.0  | 8.0  |
+------+------+------+------+
| 2.0  | 4.0  | 8.0  | 2.0  |
+------+------+------+------+
| 32.0 | 2.0  | 2.0  | 4.0  |
+------+------+------+------+
+------+------+------+------+
| 8.0  | 2.0  | 16.0 | 2.0  |
+------+------+------+------+
| 32.0 |128.0 | 4.0  | 8.0  |
+------+------+------+------+
| 2.0  | 4.0  | 8.0  | 2.0  |
+------+------+------+------+
|      | 32.0 | 4.0  | 4.0  |
+------+------+------+------+
+------+------+------+------+
| 8.0  | 2.0  | 16.0 | 2.0  |
+------+------+------+------+
| 32.0 |128.0 | 4.0  | 8.0  |
+------+------+------+------+
| 2.0  | 4.0  | 8.0  | 2.0  |
+------+--

+------+------+------+------+
| 2.0  | 16.0 |128.0 |      |
+------+------+------+------+
| 4.0  | 64.0 | 8.0  | 2.0  |
+------+------+------+------+
| 8.0  | 16.0 | 32.0 | 8.0  |
+------+------+------+------+
| 4.0  | 32.0 | 8.0  | 8.0  |
+------+------+------+------+
+------+------+------+------+
| 2.0  | 16.0 |128.0 | 2.0  |
+------+------+------+------+
| 4.0  | 64.0 | 8.0  | 2.0  |
+------+------+------+------+
| 8.0  | 16.0 | 32.0 | 8.0  |
+------+------+------+------+
|      | 4.0  | 32.0 | 16.0 |
+------+------+------+------+
+------+------+------+------+
| 2.0  | 16.0 |      |      |
+------+------+------+------+
| 4.0  | 64.0 |128.0 | 4.0  |
+------+------+------+------+
| 8.0  | 16.0 | 8.0  | 8.0  |
+------+------+------+------+
| 4.0  | 4.0  | 64.0 | 16.0 |
+------+------+------+------+
+------+------+------+------+
| 2.0  | 16.0 | 2.0  |      |
+------+------+------+------+
| 4.0  | 64.0 |128.0 | 4.0  |
+------+------+------+------+
| 8.0  | 16.0 | 16.0 |      |
+------+--

+------+------+------+------+
|      |      | 4.0  | 8.0  |
+------+------+------+------+
| 4.0  | 16.0 | 32.0 | 8.0  |
+------+------+------+------+
| 32.0 |256.0 | 64.0 | 2.0  |
+------+------+------+------+
| 8.0  | 16.0 | 2.0  | 4.0  |
+------+------+------+------+
+------+------+------+------+
| 4.0  | 2.0  | 4.0  | 16.0 |
+------+------+------+------+
| 32.0 | 16.0 | 32.0 | 2.0  |
+------+------+------+------+
| 8.0  |256.0 | 64.0 | 4.0  |
+------+------+------+------+
|      | 16.0 | 2.0  |      |
+------+------+------+------+
+------+------+------+------+
|      | 2.0  | 4.0  | 16.0 |
+------+------+------+------+
| 4.0  | 16.0 | 32.0 | 2.0  |
+------+------+------+------+
| 32.0 |256.0 | 64.0 | 4.0  |
+------+------+------+------+
| 8.0  | 16.0 | 2.0  | 2.0  |
+------+------+------+------+
+------+------+------+------+
| 4.0  | 4.0  | 16.0 |      |
+------+------+------+------+
| 4.0  | 16.0 | 32.0 | 2.0  |
+------+------+------+------+
| 32.0 |256.0 | 64.0 | 4.0  |
+------+--

# TEST 1 - Temporal Difference Learning

# TEST 2 - n-Step Bootstraping

# TEST 3 - TBD if we do

# TEST 4 - Adverseral 2048