In [1]:
# Change directory to the root of the project
import os 
os.chdir('..')
os.chdir('..')
os.chdir('..')
print(f"Working directory: {os.getcwd()}")

Working directory: /Users/eohjelle/Documents/2025-dots-and-boxes/dots-and-boxes


In [2]:
# Variables
rows = 5
cols = 5
num_simulations = 100
num_rollouts = 100
temperature = 1.0
num_examples = 10**4



In [3]:
# Generate training data via self play

from applications.dots_and_boxes.game_state import DotsAndBoxesGameState
from core.implementations import MCTS
from core import TrainingExample
from typing import List, Set
import time
import random

agent = MCTS(DotsAndBoxesGameState(rows, cols), num_simulations=num_simulations, num_rollouts=num_rollouts, temperature=temperature)

collected_states: Set[DotsAndBoxesGameState] = set()
states: List[DotsAndBoxesGameState] = []
examples: List[TrainingExample] = []

time_at_previous_example = time.time()

while len(collected_states) < num_examples:
    if len(collected_states) > 0:
        state = random.choice(states)
    else:
        state = DotsAndBoxesGameState(rows, cols)
    agent.root = agent.state_dict[state]
    while not state.is_terminal():
        action = agent()
        if state not in collected_states:
            collected_states.add(state)
            target = (
                agent.full_policy(agent.root),
                agent.root.value.mean_value # type: ignore
            )
            data = {
                'legal_actions': state.get_legal_actions()
            }
            states.append(state)
            examples.append(TrainingExample(state, target, data))
            elapsed_time = time.time() - time_at_previous_example
            time_at_previous_example = time.time()  
            print(f"Found state {len(collected_states)}:\n{state}\nTarget policy: {target[0]}\nTarget value: {target[1]}\nData: {data}\nTime since last example: {elapsed_time:.2f} seconds\n")
        state = state.apply_action(action)
        agent.update_root([action])

Found state 1:
+ . + . + . + . + . +
.   .   .   .   .   .
+ . + . + . + . + . +
.   .   .   .   .   .
+ . + . + . + . + . +
.   .   .   .   .   .
+ . + . + . + . + . +
.   .   .   .   .   .
+ . + . + . + . + . +
.   .   .   .   .   .
+ . + . + . + . + . +

 Player 1 to move.
Target policy: {(0, 1): 0.1, (0, 3): 0.1, (0, 5): 0.1, (0, 7): 0.1, (0, 9): 0.1, (1, 0): 0.1, (1, 2): 0.1, (1, 4): 0.1, (1, 6): 0.1, (1, 8): 0.1, (1, 10): 0.0, (2, 1): 0.0, (2, 3): 0.0, (2, 5): 0.0, (2, 7): 0.0, (2, 9): 0.0, (3, 0): 0.0, (3, 2): 0.0, (3, 4): 0.0, (3, 6): 0.0, (3, 8): 0.0, (3, 10): 0.0, (4, 1): 0.0, (4, 3): 0.0, (4, 5): 0.0, (4, 7): 0.0, (4, 9): 0.0, (5, 0): 0.0, (5, 2): 0.0, (5, 4): 0.0, (5, 6): 0.0, (5, 8): 0.0, (5, 10): 0.0, (6, 1): 0.0, (6, 3): 0.0, (6, 5): 0.0, (6, 7): 0.0, (6, 9): 0.0, (7, 0): 0.0, (7, 2): 0.0, (7, 4): 0.0, (7, 6): 0.0, (7, 8): 0.0, (7, 10): 0.0, (8, 1): 0.0, (8, 3): 0.0, (8, 5): 0.0, (8, 7): 0.0, (8, 9): 0.0, (9, 0): 0.0, (9, 2): 0.0, (9, 4): 0.0, (9, 6): 0.0, (9, 8): 0.0, (

In [4]:
# Encode examples

from applications.dots_and_boxes.encoder import DABSimpleTensorMapping
import torch

tensor_mapping = DABSimpleTensorMapping()
device = torch.device('cpu')

states = tensor_mapping.encode_states(states, device)
targets, data = tensor_mapping.encode_examples(examples, device)




In [5]:
# Add examples to buffer and save

from core import ReplayBuffer

buffer = ReplayBuffer(len(examples))
buffer.extend(states, targets, data)
artifact_name = f'dots_and_boxes_{rows}x{cols}_{tensor_mapping.__class__.__name__}_mcts'
path = os.path.join('applications', 'dots_and_boxes', 'training_data', f'{artifact_name}.pkl')
buffer.save(path)
# buffer.save_to_wandb(artifact_name=artifact_name, project='AlphaZero-DotsAndBoxes')