# Playing the game using only the MCTS agent

In [1]:
%load_ext autoreload
%autoreload 2

## Install

In [2]:
%pip install -qU pip
%pip install -q seaborn pandas matplotlib
%pip install install --quiet 'git+https://github.com/balgot/mathematico.git#egg=mathematico&subdirectory=game'

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [3]:
import os
import sys
sys.path.append(os.path.abspath(os.path.join('../')))


from mathematico import Arena, Board, Mathematico
from mathematico import Player, HumanPlayer, RandomPlayer, SimulationPlayer

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set()

from src.utils import mcts

## MCTS Setup

In [4]:
from typing import Tuple, Sequence, List
from copy import deepcopy
from mathematico import Board
from random import randrange
from collections import Counter


# The action type (row and col to play)
Action = Tuple[int, int]

# The card we are supposed to place on the board
Card = int

# For representing the available cards, card ↦ number of cards in the deck
Deck = dict[Card, int]


class MathematicoState(mcts.StateI):
    """
    State of the game, represented by the positions
    of the cards already placed on the board and the card
    to place.
    """

    def __init__(self, board: Board, number: Card, cards_left: 'Deck | None' = None):
        self.board: Board = board
        self.number: Card = number
        self.deck = cards_left

        # count the number of cards available
        if self.deck is None:
            self.deck = {i: 4 for i in range(1, 14)}
            for row in range(board.size):
                for col in range(board.size):
                    num = board.grid[row][col]
                    if num != 0:
                        self.deck[num] -= 1
                        if not self.deck[num]:
                            self.deck.pop(num)

    def get_possible_actions(self) -> Sequence[Action]:
        return list(self.board.possible_moves())

    def take_action(self, action: Action) -> 'list[tuple[mcts.StateI, float]]':
        cnt = sum(self.deck.values())
        res = []

        # find all possible states after playing card: self.number at position action
        for card, n in self.deck.items():
            board = deepcopy(self.board)
            board.make_move(action, self.number)  # take action that leads TO this state
            deck = self.deck.copy()  # enough for int ↦ int
            deck[self.number] -= 1
            if not deck[self.number]:
                deck.pop(self.number)
            res.append((MathematicoState(board, card, deck), n / cnt))
        return res

    def is_terminal(self) -> bool:
        return self.board.occupied_cells == self.board.size ** 2

    def get_reward(self):
        return self.board.score()



class MctsPlayerRandomRollout(Player):
    def __init__(self, max_time, max_iters):
        super().__init__()
        self._mcts = mcts.MCTS(time_limit=max_time, iters_limit=max_iters)
    
    def reset(self) -> None:
        self.board = Board()

    def move(self, number: int):
        state = MathematicoState(self.board, number)
        action, value = self._mcts.search(state)[0]
        # print(f"[info] Expecting value: {value}")        
        self.board.make_move(action, number)

## Single Game

In [5]:
import time
%pip install -q tqdm
from tqdm.notebook import trange

class _Arena(Arena):
    def run(self, rounds: int = 100, verbose: bool = True, seed = None):
        start = time.time()
        for _ in trange(rounds):
            # initialize a new game
            game = Mathematico(seed=seed)
            for player in self.players:
                player.reset()
                game.add_player(player)

            # play the game and collect rewards
            results = game.play(verbose=True)
            for idx, result in enumerate(results):
                self.results[idx].append(result)

        if verbose:
            total_time = time.time() - start
            print(f"Steps run: {rounds}\tElapsed time: {total_time}")

        return self.results

Note: you may need to restart the kernel to use updated packages.


In [20]:
import random
random.seed(0)

arena = _Arena()
SIMULS = [10, 50, 200]

for simuls in SIMULS:
    player = MctsPlayerRandomRollout(None, max_iters=simuls)
    arena.add_player(player)

In [21]:
%%time
print(arena.run(seed=1, rounds=1))

  0%|          | 0/1 [00:00<?, ?it/s]

Moves played:	[13]
Current card:	3
Move number:	1
Players:	[<__main__.MctsPlayerRandomRollout object at 0x7f197b18af10>, <__main__.MctsPlayerRandomRollout object at 0x7f197b18aa00>, <__main__.MctsPlayerRandomRollout object at 0x7f197b3d6340>]
Moves played:	[13, 3]
Current card:	10
Move number:	2
Players:	[<__main__.MctsPlayerRandomRollout object at 0x7f197b18af10>, <__main__.MctsPlayerRandomRollout object at 0x7f197b18aa00>, <__main__.MctsPlayerRandomRollout object at 0x7f197b3d6340>]
Moves played:	[13, 3, 10]
Current card:	6
Move number:	3
Players:	[<__main__.MctsPlayerRandomRollout object at 0x7f197b18af10>, <__main__.MctsPlayerRandomRollout object at 0x7f197b18aa00>, <__main__.MctsPlayerRandomRollout object at 0x7f197b3d6340>]
Moves played:	[13, 3, 10, 6]
Current card:	1
Move number:	4
Players:	[<__main__.MctsPlayerRandomRollout object at 0x7f197b18af10>, <__main__.MctsPlayerRandomRollout object at 0x7f197b18aa00>, <__main__.MctsPlayerRandomRollout object at 0x7f197b3d6340>]
Moves p

ValueError: Unknown combination of values: defaultdict(<class 'int'>, {6: 5})

In [None]:
for p, x in zip(arena.players, SIMULS):
    print("\n\n")
    print(f"# simulations: {x}")
    print("=================")
    print(p.board)
    print()




# simulations: 10
+--+--+--+--+--+
|13| 3|10| 6| 1|
+--+--+--+--+--+
|10| 5| 3| 9| 2|
+--+--+--+--+--+
| 8|13|11| 4| 6|
+--+--+--+--+--+
|12|12| 6|11|10|
+--+--+--+--+--+
| 4|13| 9| 6| 7|
+--+--+--+--+--+




# simulations: 300
+--+--+--+--+--+
|13| 3|10| 6| 1|
+--+--+--+--+--+
|10| 5| 3| 9| 2|
+--+--+--+--+--+
| 8|13|11| 4| 6|
+--+--+--+--+--+
|12|12| 6|11|10|
+--+--+--+--+--+
| 4|13| 9| 6| 7|
+--+--+--+--+--+

