In [1]:
# Change directory to the root of the project
import os 
os.chdir('..')
os.chdir('..')
os.chdir('..')
print(f"Working directory: {os.getcwd()}")

Working directory: /Users/eohjelle/Documents/2025-dots-and-boxes/dots-and-boxes


In [2]:
from applications.dots_and_boxes.NNmodels.transformer import TransformerInitParams
from applications.dots_and_boxes.encoder import DABMiddleGroundTensorMapping
from core.implementations import AlphaZeroConfig
import torch

# Initialize parameters

## Model parameters
model_type = 'transformer'
model_params: TransformerInitParams = {
    'num_rows': 5,
    'num_cols': 5,
    'embed_dim': 128,
    'feedforward_dim': 512,
    'num_heads': 4,
    'attention_layers': 4
}
device = torch.device('mps')
model_name = 'dots_and_boxes_transformer'
tensor_mapping = DABMiddleGroundTensorMapping

## Initialize new model
# load_model = None
# load_model_params = {}
load_model = 'from_wandb'
load_model_params = {
    'project': 'AlphaZero-DotsAndBoxes',
    'model_name': model_name,
    'model_version': 'v16'
}

## Optimizer parameters
optimizer_type = 'adam'
optimizer_params = {
    'lr': 1e-2,
    'betas': (0.9, 0.999),
    'eps': 1e-8,
    'weight_decay': 1e-4,
    'amsgrad': False
}

## Learning scheduler parameters
lr_scheduler_type = 'plateau'
lr_scheduler_params = {
    'factor': 0.5,
    'patience': 100,
    'cooldown': 100,
    'min_lr': 1e-6
}

## Training parameters
# training_method = 'supervised'
# trainer_params = {}
# training_params = {
#     'epochs': 100,
#     'batch_size': 256,
#     'eval_freq': 25,
#     'checkpoint_freq': 50,
#     'mask_illegal_moves': False,
#     'mask_value': -20.0, # Doesn't matter when mask_illegal_moves is False
#     'checkpoint_dir': 'checkpoints',
#     'start_at': 1
# }
training_method = 'self_play'
alphazero_config = AlphaZeroConfig(
    exploration_constant=1.0,
    dirichlet_alpha=0.3,
    dirichlet_epsilon=0.25,
    temperature=1.0
)
alphazero_eval_config = AlphaZeroConfig(
    exploration_constant=1.0,
    dirichlet_alpha=0.0,
    dirichlet_epsilon=0.0,
    temperature=0.0
)
trainer_params = {
    'value_softness': 1.0
}
training_params = {
    'num_iterations': 500,
    'games_per_iteration': 10,
    'batch_size': 256,
    'steps_per_iteration': 100,
    'num_simulations': 100,
    'checkpoint_frequency': 20,
    'tree_search_params': alphazero_config,
    'tree_search_eval_params': alphazero_eval_config,
    'start_iteration': 150
}

## Load replay buffer from wandb
load_replay_buffer = 'from_wandb'
load_replay_buffer_params = {
    'project': 'AlphaZero-DotsAndBoxes',
    'artifact_name': f'{model_name}_replay_buffer',
    'artifact_version': 'v5'
}
# load_replay_buffer = None
# load_replay_buffer_params = {
#     'max_size': 10**4
# }
# load_replay_buffer = 'from_file'
# buffer_type = 'mcts' # used in path name below
# load_replay_buffer_params = {
#     'path': f'applications/dots_and_boxes/training_data/dots_and_boxes_{model_params["num_rows"]}x{model_params["num_cols"]}_{tensor_mapping.__name__}_{buffer_type}.pkl',
#     'device': device
# }
# load_replay_buffer = 'from_wandb'
# load_replay_buffer_params = {
#     'project': 'AlphaZero-DotsAndBoxes',
#     'artifact_name': f'dots_and_boxes_{model_params["num_rows"]}x{model_params["num_cols"]}_SimpleTensorMapping_minimax',
#     'artifact_version': 'latest'
# }


In [3]:
# Initialize wandb run
import wandb

run_name = 'Self-play Transformer'
notes = 'Transformer model with middle ground encoding training with self-play on 5 x 5 board.'

config = {
    'model_type': model_type,
    'model_params': model_params,
    'optimizer_type': optimizer_type,
    'optimizer_params': optimizer_params,
    'lr_scheduler_type': lr_scheduler_type,
    'lr_scheduler_params': lr_scheduler_params,
    'training_method': training_method,
    'trainer_params': trainer_params,
    'training_params': training_params
}

# run = wandb.init(
#     project='AlphaZero-DotsAndBoxes',
#     name=run_name,
#     config=config,
#     notes=notes,
#     group=f'{training_method} training on {model_params["num_rows"]}x{model_params["num_cols"]} board'
# )
# run = None
run_id='4qihqmj6'
run = wandb.init(
    project='AlphaZero-DotsAndBoxes',
    id=run_id,
)

[34m[1mwandb[0m: Currently logged in as: [33meohjelle[0m ([33meigenway[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


In [4]:
# Perform training

from applications.dots_and_boxes.train import train

model_interface = train(
    num_rows = model_params["num_rows"],
    num_cols = model_params["num_cols"],
    model_type=model_type,
    model_params=model_params,
    device=device,
    model_name=model_name,
    optimizer_type=optimizer_type,
    optimizer_params=optimizer_params,
    lr_scheduler_type=lr_scheduler_type,
    lr_scheduler_params=lr_scheduler_params,
    training_method=training_method,
    trainer_params=trainer_params,
    training_params=training_params,
    load_model=load_model,
    load_model_params=load_model_params,
    load_replay_buffer=load_replay_buffer,
    load_replay_buffer_params=load_replay_buffer_params,
    wandb_run=run
)

[34m[1mwandb[0m:   1 of 1 files downloaded.  
  checkpoint = torch.load(path, map_location=device)
[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`
[34m[1mwandb[0m: Downloading large artifact dots_and_boxes_transformer_replay_buffer:v5, 81.26MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:0.2
  checkpoint = torch.load(path, map_location=device)


Could not get best score for dots_and_boxes_transformer from wandb: HTTP 400: artifacts must be specified as 'collection:alias'
Using initial best score of -inf.

Iteration 151/650
Self-play phase...
Playing game 10/10
Generated 610 new positions
Training phase...
Current learning rate: 0.010000

Iteration 151 summary:
Average loss: 3.7431
Average policy_loss: 2.6480
Average value_loss: 1.0951
Replay buffer size: 73810
Time taken: 945.3s

Iteration 152/650
Self-play phase...
Playing game 10/10
Generated 610 new positions
Training phase...
Current learning rate: 0.010000

Iteration 152 summary:
Average loss: 3.7357
Average policy_loss: 2.6405
Average value_loss: 1.0953
Replay buffer size: 74420
Time taken: 986.2s

Iteration 153/650
Self-play phase...
Playing game 10/10
Generated 610 new positions
Training phase...
Current learning rate: 0.010000

Iteration 153 summary:
Average loss: 3.7303
Average policy_loss: 2.6316
Average value_loss: 1.0987
Replay buffer size: 75030
Time taken: 990.2

KeyboardInterrupt: 

In [None]:
run.finish()

AttributeError: 'NoneType' object has no attribute 'finish'

In [6]:
def print_model_parameters(model):
    """
    Print the total number of parameters in a PyTorch model,
    with a breakdown of trainable vs non-trainable parameters.
    
    Args:
        model: PyTorch model
    """
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    non_trainable_params = total_params - trainable_params
    
    print(f"Total parameters: {total_params:,}")
    print(f"Trainable parameters: {trainable_params:,} ({trainable_params/total_params:.2%})")
    print(f"Non-trainable parameters: {non_trainable_params:,} ({non_trainable_params/total_params:.2%})")
    
    # Optional: Print parameters by layer
    print("\nParameters by layer:")
    for name, param in model.named_parameters():
        print(f"{name}: {param.numel():,} parameters")

# Example usage
print_model_parameters(model_interface.model)

Total parameters: 816,130
Trainable parameters: 816,130 (100.00%)
Non-trainable parameters: 0 (0.00%)

Parameters by layer:
pos_embedding: 10,880 parameters
input_embedding.weight: 896 parameters
transformer_blocks.0.self_attn.in_proj_weight: 49,152 parameters
transformer_blocks.0.self_attn.in_proj_bias: 384 parameters
transformer_blocks.0.self_attn.out_proj.weight: 16,384 parameters
transformer_blocks.0.self_attn.out_proj.bias: 128 parameters
transformer_blocks.0.linear1.weight: 65,536 parameters
transformer_blocks.0.linear1.bias: 512 parameters
transformer_blocks.0.linear2.weight: 65,536 parameters
transformer_blocks.0.linear2.bias: 128 parameters
transformer_blocks.0.norm1.weight: 128 parameters
transformer_blocks.0.norm1.bias: 128 parameters
transformer_blocks.0.norm2.weight: 128 parameters
transformer_blocks.0.norm2.bias: 128 parameters
transformer_blocks.1.self_attn.in_proj_weight: 49,152 parameters
transformer_blocks.1.self_attn.in_proj_bias: 384 parameters
transformer_blocks.1.