# MCTS performance estimation 

In [None]:
!git clone https://github.com/balgot/mathematico-ai.git gh --
!cd gh && git checkout structure && ls
!ls

Cloning into 'gh'...
remote: Enumerating objects: 172, done.[K
remote: Counting objects: 100% (172/172), done.[K
remote: Compressing objects: 100% (112/112), done.[K
remote: Total 172 (delta 46), reused 165 (delta 45), pack-reused 0[K
Receiving objects: 100% (172/172), 1.24 MiB | 19.02 MiB/s, done.
Resolving deltas: 100% (46/46), done.
Branch 'structure' set up to track remote branch 'structure' from 'origin'.
Switched to a new branch 'structure'
notebooks  pycallgraph.png  README.md  script.py  src  TODO
gh  sample_data


In [None]:
%%capture
%load_ext autoreload
%autoreload 2

%pip install -Uq pip
%pip install install --quiet 'git+https://github.com/balgot/mathematico.git#egg=mathematico&subdirectory=game'
%pip install -q tqdm

import os, sys
sys.path.append(os.path.abspath(os.path.join("./gh/")))
sys.path.append(os.path.abspath(os.path.join("./gh/src")))
sys.path.append(os.path.abspath(os.path.join("./gh/notebooks")))

import mathematico
from src.utils import mcts_old
from agent import MathematicoState, MctsPlayer

import random
import statistics
from copy import deepcopy
import time

from tqdm.notebook import trange, tqdm

In [None]:
def random_policy(state: MathematicoState) -> float:
    state = deepcopy(state)
    state.deck[state.number] -= 1
    cards = [state.number] + [k for k, v in state.deck.items() for _ in range(v)]
    random.shuffle(cards)
    for i in range(30):
        if state.board.occupied_cells == 25:
            return state.board.score()
        actions = state.get_possible_actions()
        move = random.choice(actions)
        state.board.make_move(move, cards[i])
    assert False

In [None]:
SIMULS = [10, 50, 120, 250, 340, 600, 1000, 5000, 10_000]
means = []  # mean of results
stdev = []  # stdev of results
times = []  # mean time per game
score = []  # list of scores


def _test_performance(agent, rounds, verbose=False):

    # exactly same implementation of arena but with tqdm and different seeds
    # and for only one player
    class _Arena(mathematico.Arena):
        def run(self, rounds, seed = 0):
            data = []
            time_data = []

            for i in trange(rounds, desc="Testing", leave=None, position=1):
                game = mathematico.Mathematico(seed=seed + i)
                for player in self.players:
                    player.reset()
                    game.add_player(player)

                start = time.time()  # is seconds
                res = game.play(verbose=False)
                duration = time.time() - start # in seconds
                data.append(res[0])
                time_data.append(duration)
            return data, time_data


    arena = _Arena()
    arena.add_player(agent)
    scores, times = arena.run(rounds=rounds, seed=0)

    mean = statistics.mean(scores)
    std = statistics.stdev(scores)
    tim = statistics.mean(times)
    return mean, std, tim, scores


for simul_len in (pbar := tqdm(SIMULS, desc="Players")):
    pbar.set_description(f"Simulations: {simul_len}")
    _agent = MctsPlayer(None, max_iters=simul_len, policy=random_policy)
    m, s, t, ss = _test_performance(_agent, 20)
    means.append(m)
    stdev.append(s)
    times.append(t)
    score.append(ss)

Players:   0%|          | 0/9 [00:00<?, ?it/s]

Testing:   0%|          | 0/20 [00:00<?, ?it/s]

Testing:   0%|          | 0/20 [00:00<?, ?it/s]

Testing:   0%|          | 0/20 [00:00<?, ?it/s]

Testing:   0%|          | 0/20 [00:00<?, ?it/s]

Testing:   0%|          | 0/20 [00:00<?, ?it/s]

Testing:   0%|          | 0/20 [00:00<?, ?it/s]

Testing:   0%|          | 0/20 [00:00<?, ?it/s]

Testing:   0%|          | 0/20 [00:00<?, ?it/s]

Testing:   0%|          | 0/20 [00:00<?, ?it/s]

In [None]:
# borrowed from chatGPT
%pip install -q plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

# Define data
means = np.array(means)
stdevs = np.array(stdev)
times = np.array(times)
resources = np.array(SIMULS[:len(means)])

# Fit a linear model to the data
polyfit = np.polyfit(np.log10(resources), means, 1)
x_fit = np.linspace(1, 5, 50)
y_fit = np.polyval(polyfit, x_fit)

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add score scatter trace with error bars
fig.add_trace(go.Scatter(
    x=resources,
    y=means,
    name='Score',
    mode='lines+markers',
    line=dict(color='rgba(255, 165, 0, 1)', width=2),
    error_y=dict(
        type='data',
        symmetric=True,
        array=stdevs,
        thickness=1.5,
        width=3,
        color='rgba(255, 165, 0, 1)'
    )
))

# Add time line trace
fig.add_trace(go.Scatter(
    x=resources,
    y=times,
    name='Time',
    line=dict(color='green', width=2),
    yaxis='y2'
))

# Update layout
fig.update_layout(
    title='MCTS performance vs Simulation Length',
    xaxis_title='Per-move Simulation Length',
    xaxis_type='log',
    yaxis_title='Score',
    yaxis2_title='Time (s)',
    yaxis=dict(
        gridcolor='rgba(255, 255, 255, 0.2)',
        zerolinecolor='rgba(255, 255, 255, 0.2)',
        showgrid=True,
        showline=True
    ),
    yaxis2=dict(
        overlaying='y',
        side='right',
        gridcolor='rgba(255, 255, 255, 0.2)',
        zerolinecolor='rgba(255, 255, 255, 0.2)',
        showgrid=True,
        showline=True
    ),
    plot_bgcolor='rgba(255, 255, 255, 1)',
    paper_bgcolor='rgba(255, 255, 255, 1)',
    legend=dict(
        x=0.0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0.5)',
        bordercolor='rgba(0, 0, 1, 0)',
    )
)

fig.update_traces(selector=dict(type='scatter', name='Score'), mode='lines+markers')

fig.show()

[0m

In [None]:
from pprint import pprint
pprint(means)
pprint(stdev)
pprint(times)
pprint(score)

array([ 84.5, 116. , 122. , 126. , 153. , 158. , 168.5, 226.5, 232. ])
[26.650762801367186,
 25.422741580594415,
 30.539017321246085,
 43.09109078757647,
 63.503004487562386,
 50.63803438440112,
 65.07283368818763,
 69.53037957763877,
 82.11673334185208]
array([5.82238054e-01, 3.23521981e+00, 6.87274168e+00, 1.45967200e+01,
       1.95996380e+01, 3.46485239e+01, 5.78208783e+01, 3.06127690e+02,
       6.45956584e+02])
[[90,
  100,
  70,
  80,
  50,
  110,
  120,
  70,
  40,
  90,
  40,
  90,
  100,
  90,
  100,
  150,
  80,
  90,
  60,
  70],
 [110,
  90,
  110,
  70,
  100,
  150,
  140,
  110,
  90,
  100,
  140,
  150,
  130,
  120,
  100,
  160,
  80,
  140,
  100,
  130],
 [110,
  140,
  120,
  110,
  210,
  140,
  130,
  90,
  110,
  90,
  80,
  130,
  110,
  110,
  80,
  170,
  130,
  110,
  130,
  140],
 [150,
  140,
  130,
  120,
  280,
  110,
  100,
  110,
  120,
  130,
  120,
  120,
  90,
  100,
  110,
  160,
  90,
  140,
  60,
  140],
 [100,
  160,
  180,
  100,
  70,
  240,

In [None]:
# means=array([ 84.5, 116. , 122. , 126. , 153. , 158. , 168.5, 226.5, 232. ])
# stdev=[26.650762801367186, 25.422741580594415, 30.539017321246085, 43.09109078757647, 63.503004487562386, 50.63803438440112, 65.07283368818763, 69.53037957763877, 82.11673334185208]
# times=array([5.82238054e-01, 3.23521981e+00, 6.87274168e+00, 1.45967200e+01, 1.95996380e+01, 3.46485239e+01, 5.78208783e+01, 3.06127690e+02, 6.45956584e+02])
"""
score=[
    [90, 100, 70, 80, 50, 110, 120, 70, 40, 90, 40, 90, 100, 90, 100, 150, 80, 90, 60, 70], 
    [110, 90, 110, 70, 100, 150, 140, 110, 90, 100, 140, 150, 130, 120, 100, 160, 80, 140, 100, 130], 
    [110, 140, 120, 110, 210, 140, 130, 90, 110, 90, 80, 130, 110, 110, 80, 170, 130, 110, 130, 140], 
    [150, 140, 130, 120, 280, 110, 100, 110, 120, 130, 120, 120, 90, 100, 110, 160, 90, 140, 60, 140], 
    [100, 160, 180, 100, 70, 240, 90, 180, 140, 250, 110, 100, 100, 170, 120, 270, 100, 280, 150, 150], 
    [140, 170, 150, 90, 120, 150, 100, 150, 130, 180, 160, 130, 160, 160, 150, 270, 130, 160, 310, 150], 
    [130, 240, 160, 150, 160, 120, 110, 130, 130, 240, 120, 160, 190, 140, 150, 130, 140, 390, 230, 150], 
    [160, 200, 210, 150, 300, 260, 190, 170, 210, 220, 170, 200, 200, 260, 170, 260, 340, 220, 440, 200], 
    [290, 170, 270, 170, 210, 290, 120, 160, 230, 170, 300, 200, 310, 150, 480, 190, 210, 170, 290, 260]]
"""
f"{means=}"
f"{stdev=}"
f"{times=}"
f"{score=}"

'score=[[90, 100, 70, 80, 50, 110, 120, 70, 40, 90, 40, 90, 100, 90, 100, 150, 80, 90, 60, 70], [110, 90, 110, 70, 100, 150, 140, 110, 90, 100, 140, 150, 130, 120, 100, 160, 80, 140, 100, 130], [110, 140, 120, 110, 210, 140, 130, 90, 110, 90, 80, 130, 110, 110, 80, 170, 130, 110, 130, 140], [150, 140, 130, 120, 280, 110, 100, 110, 120, 130, 120, 120, 90, 100, 110, 160, 90, 140, 60, 140], [100, 160, 180, 100, 70, 240, 90, 180, 140, 250, 110, 100, 100, 170, 120, 270, 100, 280, 150, 150], [140, 170, 150, 90, 120, 150, 100, 150, 130, 180, 160, 130, 160, 160, 150, 270, 130, 160, 310, 150], [130, 240, 160, 150, 160, 120, 110, 130, 130, 240, 120, 160, 190, 140, 150, 130, 140, 390, 230, 150], [160, 200, 210, 150, 300, 260, 190, 170, 210, 220, 170, 200, 200, 260, 170, 260, 340, 220, 440, 200], [290, 170, 270, 170, 210, 290, 120, 160, 230, 170, 300, 200, 310, 150, 480, 190, 210, 170, 290, 260]]'