In [1]:
# Change directory to the root of the project
import os 
os.chdir('..')
print(f"Working directory: {os.getcwd()}")

Working directory: /Users/eohjelle/Documents/2025-dots-and-boxes/dots-and-boxes


In this notebook we will run another wandb sweep for a transformer based model using training data generated with a Minimax model.

In [2]:
# Load the training data

from core.data_structures import ReplayBuffer
import random

buffer = ReplayBuffer.from_file('applications/tic_tac_toe/training_data/transformer.pkl')

print(f"Buffer size: {buffer.states.shape[0]}")
for i in random.sample(range(buffer.states.shape[0]), 1):
    print(f"Buffer state {i}: {buffer.states[i]}")
    for key in buffer.targets.keys():
        print(f"Buffer target {key} {i}: {buffer.targets[key][i]}")
    for key in buffer.data.keys():
        print(f"Buffer data {key} {i}: {buffer.data[key][i]}")

Buffer size: 5478
Buffer state 1912: tensor([2, 1, 1, 2, 0, 1, 0, 0, 2], device='mps:0')
Buffer target policy 1912: tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.3333, 0.0000, 0.3333, 0.3333, 0.0000],
       device='mps:0')
Buffer target value 1912: tensor([-1.], device='mps:0')
Buffer data legal_actions 1912: tensor([0., 0., 0., 0., 1., 0., 1., 1., 0.], device='mps:0')


  checkpoint = torch.load(path, map_location=device)


In [3]:
from applications.tic_tac_toe.game_state import TicTacToeState
from core.implementations.Minimax import Minimax

# Creat minmax agent and expand the game tree, this will be used for evaluation later on
state = TicTacToeState()
minimax_agent = Minimax(state)
minimax_agent_root = minimax_agent.root
minimax_agent()

def minimax_agent_factory() -> Minimax:
    """
    This function returns a minimax agent that is initialized with the root of the game tree.
    """
    minimax_agent.root = minimax_agent_root
    return minimax_agent


In [4]:
# Define sweep config

sweep_config = {
    'method': 'bayes',
    'metric': {
        'name': 'minimax_score',
        'goal': 'maximize'
    },
    'parameters': {
        # Optimizer parameters
        'learning_rate': {
            'distribution': 'uniform',
            'min': 0.0001,
            'max': 0.01
        },
        'weight_decay': {
            'distribution': 'uniform',
            'min': 0.001,
            'max': 0.01
        },

        # Model parameters
        'attention_layers': {
            'values': [2]
        },
        'embed_dim': {
            'values': [64]
        },
        'feedforward_dim': {
            'values': [256]
        },
        'dropout': {
            'values': [0.0]
        },
        'norm_first': {
            'values': [True]
        },
        'activation': {
            'values': ['relu']
        },

        # Training parameters
        'epochs': {
            'values': [100]
        },
        'batch_size': {
            'values': [256]
        },
        'mask_illegal_moves': {
            'values': [False]
        },
        'mask_value': {
            'values': [-10.0]
        }
    }
}

In [5]:
# Define simple training loop

import wandb
from applications.tic_tac_toe.game_state import TicTacToeState
from applications.tic_tac_toe.transformer_model import TicTacToeTransformerInterface
import torch
import torch.nn.functional as F
import numpy as np
from torch.utils.data import DataLoader, TensorDataset, random_split
from core.benchmark import benchmark
from core.implementations.RandomAgent import RandomAgent
from core.implementations.Minimax import Minimax
from core.implementations.AlphaZero import AlphaZeroModelAgent

import os

def sweep_agent():
    with wandb.init(project="AlphaZero-TicTacToe") as run:
        config = run.config
        device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
        model_interface = TicTacToeTransformerInterface(
            device=device,
            attention_layers=config.attention_layers,
            embed_dim=config.embed_dim,
            num_heads=4,
            feedforward_dim=config.feedforward_dim,
            dropout=config.dropout,
            norm_first=config.norm_first,
            activation=config.activation
        )
        model = model_interface.model
        
        # Create optimizer
        optimizer = torch.optim.Adam(
            model.parameters(), 
            lr=config.learning_rate,
            weight_decay=config.weight_decay
        )
        
        # Create datasets
        states = buffer.states
        policy_targets = buffer.targets['policy']
        value_targets = buffer.targets['value']
        legal_actions_mask = buffer.data['legal_actions']
        
        # Create dataset and split into train/val
        dataset = TensorDataset(states, policy_targets, value_targets, legal_actions_mask)
        train_size = int(0.9 * len(dataset))
        val_size = len(dataset) - train_size
        train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
        
        # Create data loaders
        train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False)
        
        # Training loop
        best_val_loss = float('inf')
        
        for epoch in range(config.epochs):
            # Training phase
            model.train()
            train_losses = []
            policy_losses = []
            value_losses = []
            
            for batch in train_loader:
                states_batch, policy_targets_batch, value_targets_batch, legal_actions_batch = batch
                
                # Zero gradients
                optimizer.zero_grad()
                
                # Forward pass
                outputs = model(states_batch)
                policy_logits = outputs['policy']
                value_pred = outputs['value']
                
                # Apply mask for illegal moves if enabled
                if config.mask_illegal_moves:
                    policy_logits = policy_logits * legal_actions_batch + (1 - legal_actions_batch) * config.mask_value
                
                # Compute losses
                policy_loss = F.cross_entropy(policy_logits, policy_targets_batch)
                value_loss = F.mse_loss(value_pred, value_targets_batch)
                total_loss = policy_loss + value_loss
                
                # Backward pass and optimization
                total_loss.backward()
                optimizer.step()
                
                # Track metrics
                train_losses.append(total_loss.item())
                policy_losses.append(policy_loss.item())
                value_losses.append(value_loss.item())
            
            # Validation phase
            model.eval()
            val_losses = []
            val_policy_losses = []
            val_value_losses = []
            
            with torch.no_grad():
                for batch in val_loader:
                    states_batch, policy_targets_batch, value_targets_batch, legal_actions_batch = batch
                    
                    # Forward pass
                    outputs = model(states_batch)
                    policy_logits = outputs['policy']
                    value_pred = outputs['value']
                    
                    # Apply mask for illegal moves if enabled
                    if config.mask_illegal_moves:
                        policy_logits = policy_logits * legal_actions_batch + (1 - legal_actions_batch) * config.mask_value
                    
                    # Compute losses
                    policy_loss = F.cross_entropy(policy_logits, policy_targets_batch)
                    value_loss = F.mse_loss(value_pred, value_targets_batch)
                    total_loss = policy_loss + value_loss
                    
                    # Track metrics
                    val_losses.append(total_loss.item())
                    val_policy_losses.append(policy_loss.item())
                    val_value_losses.append(value_loss.item())
            
            # Calculate average metrics
            avg_train_loss = np.mean(train_losses)
            avg_val_loss = np.mean(val_losses)
            
            # Log metrics to wandb
            wandb.log({
                "epoch": epoch,
                "train_loss": avg_train_loss,
                "train_policy_loss": np.mean(policy_losses),
                "train_value_loss": np.mean(value_losses),
                "val_loss": avg_val_loss,
                "val_policy_loss": np.mean(val_policy_losses),
                "val_value_loss": np.mean(val_value_losses),
            })
            
            print(f"Epoch {epoch+1}/{config.epochs}, "
                  f"Train Loss: {avg_train_loss:.4f}, "
                  f"Val Loss: {avg_val_loss:.4f}")
            
            # # Save best model
            # if avg_val_loss < best_val_loss:
            #     best_val_loss = avg_val_loss
                
            #     # Save model checkpoint
            #     if not os.path.exists('checkpoints'):
            #         os.makedirs('checkpoints')
                    
            #     checkpoint_path = f"checkpoints/transformer_best_{run.id}.pt"
            #     torch.save({
            #         'epoch': epoch,
            #         'model_state_dict': model.state_dict(),
            #         'optimizer_state_dict': optimizer.state_dict(),
            #         'loss': best_val_loss,
            #         'config': dict(config)
            #     }, checkpoint_path)
                
            #     # Save as W&B artifact
            #     model_artifact = wandb.Artifact(
            #         f"transformer_model_{run.id}", 
            #         type="model",
            #         description=f"Best model with val_loss: {best_val_loss:.4f}"
            #     )
            #     model_artifact.add_file(checkpoint_path)
            #     run.log_artifact(model_artifact)

        # Evaluate against agents
        stats = benchmark(
            create_agent=lambda state: AlphaZeroModelAgent(
                initial_state=state,
                model=model_interface
            ),
            create_opponents={
                'random': lambda state: RandomAgent(state),
                'minimax': lambda state: minimax_agent_factory()
            },
            initial_state=lambda: TicTacToeState(),
            num_games=100
        )
        wandb.log({
            'random_win_rate': stats['random']['win_rate'],
            'minimax_win_rate': stats['minimax']['win_rate'],
            'random_draw_rate': stats['random']['draw_rate'],
            'minimax_draw_rate': stats['minimax']['draw_rate'],
            'random_loss_rate': stats['random']['loss_rate'],
            'minimax_loss_rate': stats['minimax']['loss_rate'],
            'random_score': stats['random']['win_rate'] - stats['random']['loss_rate'],
            'minimax_score': stats['minimax']['win_rate'] - stats['minimax']['loss_rate']
        })


        

In [6]:
# Start the sweep
sweep_id = wandb.sweep(sweep_config, project="AlphaZero-TicTacToe")
print(f"Sweep ID: {sweep_id}")
wandb.agent(sweep_id, function=sweep_agent, count=20) 
wandb.finish()

Create sweep with ID: mtjttrrq
Sweep URL: https://wandb.ai/eigenway/AlphaZero-TicTacToe/sweeps/mtjttrrq
Sweep ID: mtjttrrq


[34m[1mwandb[0m: Agent Starting Run: 4em1d178 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	attention_layers: 2
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	feedforward_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0019387894391755464
[34m[1mwandb[0m: 	mask_illegal_moves: False
[34m[1mwandb[0m: 	mask_value: -10
[34m[1mwandb[0m: 	norm_first: True
[34m[1mwandb[0m: 	weight_decay: 0.009399579829803266
[34m[1mwandb[0m: Currently logged in as: [33meohjelle[0m ([33meigenway[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Epoch 1/100, Train Loss: 8.7050, Val Loss: 4.3128
Epoch 2/100, Train Loss: 2.9113, Val Loss: 2.8654
Epoch 3/100, Train Loss: 2.4253, Val Loss: 2.6549
Epoch 4/100, Train Loss: 2.3593, Val Loss: 2.5841
Epoch 5/100, Train Loss: 2.3192, Val Loss: 2.6640
Epoch 6/100, Train Loss: 2.3228, Val Loss: 2.6371
Epoch 7/100, Train Loss: 2.3089, Val Loss: 2.6646
Epoch 8/100, Train Loss: 2.3289, Val Loss: 2.6066
Epoch 9/100, Train Loss: 2.3242, Val Loss: 2.5992
Epoch 10/100, Train Loss: 2.3192, Val Loss: 2.6611
Epoch 11/100, Train Loss: 2.2977, Val Loss: 2.5752
Epoch 12/100, Train Loss: 2.2968, Val Loss: 2.5402
Epoch 13/100, Train Loss: 2.2599, Val Loss: 2.5264
Epoch 14/100, Train Loss: 2.2109, Val Loss: 2.4925
Epoch 15/100, Train Loss: 2.2223, Val Loss: 2.4424
Epoch 16/100, Train Loss: 2.1704, Val Loss: 2.4803
Epoch 17/100, Train Loss: 2.1345, Val Loss: 2.4614
Epoch 18/100, Train Loss: 2.1431, Val Loss: 2.4598
Epoch 19/100, Train Loss: 2.1016, Val Loss: 2.4295
Epoch 20/100, Train Loss: 2.0944, Val Lo

0,1
epoch,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇█
minimax_draw_rate,▁
minimax_loss_rate,▁
minimax_score,▁
minimax_win_rate,▁
random_draw_rate,▁
random_loss_rate,▁
random_score,▁
random_win_rate,▁
train_loss,█▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,99.0
minimax_draw_rate,0.35
minimax_loss_rate,0.65
minimax_score,-0.65
minimax_win_rate,0.0
random_draw_rate,0.09
random_loss_rate,0.16
random_score,0.59
random_win_rate,0.75
train_loss,0.79958


[34m[1mwandb[0m: Agent Starting Run: 0jasv9z6 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	attention_layers: 2
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	feedforward_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.008479717755861172
[34m[1mwandb[0m: 	mask_illegal_moves: False
[34m[1mwandb[0m: 	mask_value: -10
[34m[1mwandb[0m: 	norm_first: True
[34m[1mwandb[0m: 	weight_decay: 0.007743711984707411


Epoch 1/100, Train Loss: 12.0543, Val Loss: 4.0199
Epoch 2/100, Train Loss: 3.3052, Val Loss: 2.7768
Epoch 3/100, Train Loss: 2.7897, Val Loss: 2.6171
Epoch 4/100, Train Loss: 2.6207, Val Loss: 2.4583
Epoch 5/100, Train Loss: 2.5184, Val Loss: 2.3882
Epoch 6/100, Train Loss: 2.4579, Val Loss: 2.3770
Epoch 7/100, Train Loss: 2.4077, Val Loss: 2.3385
Epoch 8/100, Train Loss: 2.3861, Val Loss: 2.3382
Epoch 9/100, Train Loss: 2.3829, Val Loss: 2.2775
Epoch 10/100, Train Loss: 2.3494, Val Loss: 2.2319
Epoch 11/100, Train Loss: 2.2797, Val Loss: 2.2185
Epoch 12/100, Train Loss: 2.2852, Val Loss: 2.1111
Epoch 13/100, Train Loss: 1.9948, Val Loss: 2.0472
Epoch 14/100, Train Loss: 1.7837, Val Loss: 1.6750
Epoch 15/100, Train Loss: 2.1760, Val Loss: 2.1548
Epoch 16/100, Train Loss: 2.1678, Val Loss: 2.0932
Epoch 17/100, Train Loss: 2.1216, Val Loss: 2.0526
Epoch 18/100, Train Loss: 2.1058, Val Loss: 2.1000
Epoch 19/100, Train Loss: 2.1320, Val Loss: 2.0603
Epoch 20/100, Train Loss: 2.1050, Val L

0,1
epoch,▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇███
minimax_draw_rate,▁
minimax_loss_rate,▁
minimax_score,▁
minimax_win_rate,▁
random_draw_rate,▁
random_loss_rate,▁
random_score,▁
random_win_rate,▁
train_loss,█▆▆▅▅▄▅▅▅▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,99.0
minimax_draw_rate,0.32
minimax_loss_rate,0.68
minimax_score,-0.68
minimax_win_rate,0.0
random_draw_rate,0.12
random_loss_rate,0.18
random_score,0.52
random_win_rate,0.7
train_loss,0.79927


[34m[1mwandb[0m: Agent Starting Run: npandt8s with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	attention_layers: 2
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	feedforward_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0019337221217427673
[34m[1mwandb[0m: 	mask_illegal_moves: False
[34m[1mwandb[0m: 	mask_value: -10
[34m[1mwandb[0m: 	norm_first: True
[34m[1mwandb[0m: 	weight_decay: 0.0023144853627454502


Epoch 1/100, Train Loss: 10.1558, Val Loss: 3.7006
Epoch 2/100, Train Loss: 2.9899, Val Loss: 2.6971
Epoch 3/100, Train Loss: 2.4544, Val Loss: 2.4705
Epoch 4/100, Train Loss: 2.3589, Val Loss: 2.4499
Epoch 5/100, Train Loss: 2.2968, Val Loss: 2.3615
Epoch 6/100, Train Loss: 2.2687, Val Loss: 2.3559
Epoch 7/100, Train Loss: 2.2424, Val Loss: 2.3718
Epoch 8/100, Train Loss: 2.2109, Val Loss: 2.3555
Epoch 9/100, Train Loss: 2.2107, Val Loss: 2.2821
Epoch 10/100, Train Loss: 2.1744, Val Loss: 2.2789
Epoch 11/100, Train Loss: 2.1718, Val Loss: 2.3471
Epoch 12/100, Train Loss: 2.1368, Val Loss: 2.2444
Epoch 13/100, Train Loss: 2.0981, Val Loss: 2.2427
Epoch 14/100, Train Loss: 2.0716, Val Loss: 2.2462
Epoch 15/100, Train Loss: 2.0639, Val Loss: 2.2324
Epoch 16/100, Train Loss: 2.0405, Val Loss: 2.1855
Epoch 17/100, Train Loss: 2.0467, Val Loss: 2.1608
Epoch 18/100, Train Loss: 2.0446, Val Loss: 2.2068
Epoch 19/100, Train Loss: 2.0560, Val Loss: 2.1761
Epoch 20/100, Train Loss: 2.0269, Val L

0,1
epoch,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇█
minimax_draw_rate,▁
minimax_loss_rate,▁
minimax_score,▁
minimax_win_rate,▁
random_draw_rate,▁
random_loss_rate,▁
random_score,▁
random_win_rate,▁
train_loss,███▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▅▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁

0,1
epoch,99.0
minimax_draw_rate,0.47
minimax_loss_rate,0.53
minimax_score,-0.53
minimax_win_rate,0.0
random_draw_rate,0.08
random_loss_rate,0.08
random_score,0.76
random_win_rate,0.84
train_loss,0.74832


[34m[1mwandb[0m: Agent Starting Run: i5dcam95 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	attention_layers: 2
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	feedforward_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001197666893570677
[34m[1mwandb[0m: 	mask_illegal_moves: False
[34m[1mwandb[0m: 	mask_value: -10
[34m[1mwandb[0m: 	norm_first: True
[34m[1mwandb[0m: 	weight_decay: 0.0028508025234607044


Epoch 1/100, Train Loss: 7.0817, Val Loss: 3.2156
Epoch 2/100, Train Loss: 2.7303, Val Loss: 2.2265
Epoch 3/100, Train Loss: 2.1746, Val Loss: 2.1410
Epoch 4/100, Train Loss: 2.0775, Val Loss: 1.9873
Epoch 5/100, Train Loss: 1.9473, Val Loss: 1.8279
Epoch 6/100, Train Loss: 1.9943, Val Loss: 1.9936
Epoch 7/100, Train Loss: 1.8806, Val Loss: 1.7584
Epoch 8/100, Train Loss: 1.7088, Val Loss: 1.6965
Epoch 9/100, Train Loss: 1.6906, Val Loss: 1.8272
Epoch 10/100, Train Loss: 1.6274, Val Loss: 1.5407
Epoch 11/100, Train Loss: 1.5628, Val Loss: 1.6274
Epoch 12/100, Train Loss: 1.5466, Val Loss: 1.7846
Epoch 13/100, Train Loss: 1.5522, Val Loss: 1.3647
Epoch 14/100, Train Loss: 1.4487, Val Loss: 1.7695
Epoch 15/100, Train Loss: 1.4491, Val Loss: 1.3142
Epoch 16/100, Train Loss: 1.4514, Val Loss: 1.5397
Epoch 17/100, Train Loss: 1.4871, Val Loss: 1.8352
Epoch 18/100, Train Loss: 1.5193, Val Loss: 1.2739
Epoch 19/100, Train Loss: 1.3728, Val Loss: 1.4297
Epoch 20/100, Train Loss: 1.3405, Val Lo

0,1
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
minimax_draw_rate,▁
minimax_loss_rate,▁
minimax_score,▁
minimax_win_rate,▁
random_draw_rate,▁
random_loss_rate,▁
random_score,▁
random_win_rate,▁
train_loss,█▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,99.0
minimax_draw_rate,0.4
minimax_loss_rate,0.6
minimax_score,-0.6
minimax_win_rate,0.0
random_draw_rate,0.11
random_loss_rate,0.08
random_score,0.73
random_win_rate,0.81
train_loss,0.73267


[34m[1mwandb[0m: Agent Starting Run: 09ps40kt with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	attention_layers: 2
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	feedforward_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0025219949917876065
[34m[1mwandb[0m: 	mask_illegal_moves: False
[34m[1mwandb[0m: 	mask_value: -10
[34m[1mwandb[0m: 	norm_first: True
[34m[1mwandb[0m: 	weight_decay: 0.004981025182155755


Epoch 1/100, Train Loss: 7.4093, Val Loss: 3.5265
Epoch 2/100, Train Loss: 2.8013, Val Loss: 2.5283
Epoch 3/100, Train Loss: 2.4088, Val Loss: 2.3753
Epoch 4/100, Train Loss: 2.3691, Val Loss: 2.3823
Epoch 5/100, Train Loss: 2.3547, Val Loss: 2.3830
Epoch 6/100, Train Loss: 2.3121, Val Loss: 2.4042
Epoch 7/100, Train Loss: 2.3284, Val Loss: 2.3706
Epoch 8/100, Train Loss: 2.3164, Val Loss: 2.2949
Epoch 9/100, Train Loss: 2.2039, Val Loss: 2.2633
Epoch 10/100, Train Loss: 2.0811, Val Loss: 2.1718
Epoch 11/100, Train Loss: 1.9875, Val Loss: 1.9642
Epoch 12/100, Train Loss: 1.9075, Val Loss: 1.9969
Epoch 13/100, Train Loss: 1.8222, Val Loss: 1.8704
Epoch 14/100, Train Loss: 1.7675, Val Loss: 1.9393
Epoch 15/100, Train Loss: 1.6223, Val Loss: 1.7092
Epoch 16/100, Train Loss: 1.5579, Val Loss: 1.7072
Epoch 17/100, Train Loss: 1.4244, Val Loss: 1.4065
Epoch 18/100, Train Loss: 1.3586, Val Loss: 1.6388
Epoch 19/100, Train Loss: 1.3421, Val Loss: 1.4532
Epoch 20/100, Train Loss: 1.2667, Val Lo

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇████
minimax_draw_rate,▁
minimax_loss_rate,▁
minimax_score,▁
minimax_win_rate,▁
random_draw_rate,▁
random_loss_rate,▁
random_score,▁
random_win_rate,▁
train_loss,█▇▆▆▆▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,99.0
minimax_draw_rate,0.29
minimax_loss_rate,0.71
minimax_score,-0.71
minimax_win_rate,0.0
random_draw_rate,0.09
random_loss_rate,0.1
random_score,0.71
random_win_rate,0.81
train_loss,0.77979


[34m[1mwandb[0m: Agent Starting Run: mbmbcv2l with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	attention_layers: 2
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	feedforward_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0011949541146207964
[34m[1mwandb[0m: 	mask_illegal_moves: False
[34m[1mwandb[0m: 	mask_value: -10
[34m[1mwandb[0m: 	norm_first: True
[34m[1mwandb[0m: 	weight_decay: 0.002030246617248605


Epoch 1/100, Train Loss: 10.9743, Val Loss: 4.4529
Epoch 2/100, Train Loss: 3.3289, Val Loss: 2.5339
Epoch 3/100, Train Loss: 2.3177, Val Loss: 2.1909
Epoch 4/100, Train Loss: 2.0657, Val Loss: 2.0823
Epoch 5/100, Train Loss: 1.9206, Val Loss: 2.0916
Epoch 6/100, Train Loss: 1.7928, Val Loss: 1.7381
Epoch 7/100, Train Loss: 1.6650, Val Loss: 1.9347
Epoch 8/100, Train Loss: 1.7237, Val Loss: 1.6653
Epoch 9/100, Train Loss: 1.6576, Val Loss: 1.7233
Epoch 10/100, Train Loss: 1.6031, Val Loss: 1.6473
Epoch 11/100, Train Loss: 1.7029, Val Loss: 1.8305
Epoch 12/100, Train Loss: 1.7680, Val Loss: 1.8556
Epoch 13/100, Train Loss: 1.7346, Val Loss: 1.8347
Epoch 14/100, Train Loss: 1.5410, Val Loss: 1.7704
Epoch 15/100, Train Loss: 1.5953, Val Loss: 1.7132
Epoch 16/100, Train Loss: 1.4445, Val Loss: 1.7020
Epoch 17/100, Train Loss: 1.6137, Val Loss: 1.7406
Epoch 18/100, Train Loss: 1.5540, Val Loss: 1.7346
Epoch 19/100, Train Loss: 1.5407, Val Loss: 1.6894
Epoch 20/100, Train Loss: 1.5064, Val L

0,1
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇████
minimax_draw_rate,▁
minimax_loss_rate,▁
minimax_score,▁
minimax_win_rate,▁
random_draw_rate,▁
random_loss_rate,▁
random_score,▁
random_win_rate,▁
train_loss,█▄▄▄▃▃▃▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,99.0
minimax_draw_rate,0.47
minimax_loss_rate,0.53
minimax_score,-0.53
minimax_win_rate,0.0
random_draw_rate,0.09
random_loss_rate,0.07
random_score,0.77
random_win_rate,0.84
train_loss,0.72644


[34m[1mwandb[0m: Agent Starting Run: 1hxsqg6h with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	attention_layers: 2
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	feedforward_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0006021389753876815
[34m[1mwandb[0m: 	mask_illegal_moves: False
[34m[1mwandb[0m: 	mask_value: -10
[34m[1mwandb[0m: 	norm_first: True
[34m[1mwandb[0m: 	weight_decay: 0.0018254838812306344


Epoch 1/100, Train Loss: 10.2643, Val Loss: 4.7389
Epoch 2/100, Train Loss: 3.5303, Val Loss: 2.7488
Epoch 3/100, Train Loss: 2.4375, Val Loss: 2.2904
Epoch 4/100, Train Loss: 2.1836, Val Loss: 2.1669
Epoch 5/100, Train Loss: 2.0872, Val Loss: 2.1769
Epoch 6/100, Train Loss: 2.0230, Val Loss: 2.1279
Epoch 7/100, Train Loss: 1.9786, Val Loss: 2.0637
Epoch 8/100, Train Loss: 1.9416, Val Loss: 2.0246
Epoch 9/100, Train Loss: 1.8408, Val Loss: 1.9051
Epoch 10/100, Train Loss: 1.7638, Val Loss: 1.6142
Epoch 11/100, Train Loss: 1.6408, Val Loss: 1.6165
Epoch 12/100, Train Loss: 1.5921, Val Loss: 1.6184
Epoch 13/100, Train Loss: 1.5603, Val Loss: 1.6401
Epoch 14/100, Train Loss: 1.5970, Val Loss: 1.6495
Epoch 15/100, Train Loss: 1.5738, Val Loss: 1.6200
Epoch 16/100, Train Loss: 1.4682, Val Loss: 1.5988
Epoch 17/100, Train Loss: 1.4938, Val Loss: 1.5599
Epoch 18/100, Train Loss: 1.4354, Val Loss: 1.4084
Epoch 19/100, Train Loss: 1.5077, Val Loss: 1.5584
Epoch 20/100, Train Loss: 1.4412, Val L

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇███
minimax_draw_rate,▁
minimax_loss_rate,▁
minimax_score,▁
minimax_win_rate,▁
random_draw_rate,▁
random_loss_rate,▁
random_score,▁
random_win_rate,▁
train_loss,█▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,99.0
minimax_draw_rate,0.46
minimax_loss_rate,0.54
minimax_score,-0.54
minimax_win_rate,0.0
random_draw_rate,0.09
random_loss_rate,0.1
random_score,0.71
random_win_rate,0.81
train_loss,0.71506


[34m[1mwandb[0m: Agent Starting Run: v4i391rv with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	attention_layers: 2
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	feedforward_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0011455660395738663
[34m[1mwandb[0m: 	mask_illegal_moves: False
[34m[1mwandb[0m: 	mask_value: -10
[34m[1mwandb[0m: 	norm_first: True
[34m[1mwandb[0m: 	weight_decay: 0.0013250564453652408


Epoch 1/100, Train Loss: 7.2512, Val Loss: 3.3992
Epoch 2/100, Train Loss: 2.7990, Val Loss: 2.6207
Epoch 3/100, Train Loss: 2.3741, Val Loss: 2.4290
Epoch 4/100, Train Loss: 2.3120, Val Loss: 2.4573
Epoch 5/100, Train Loss: 2.3480, Val Loss: 2.3741
Epoch 6/100, Train Loss: 2.2630, Val Loss: 2.3720
Epoch 7/100, Train Loss: 2.2174, Val Loss: 2.2896
Epoch 8/100, Train Loss: 2.1558, Val Loss: 2.3370
Epoch 9/100, Train Loss: 2.1646, Val Loss: 2.2289
Epoch 10/100, Train Loss: 2.1079, Val Loss: 2.2678
Epoch 11/100, Train Loss: 2.1397, Val Loss: 2.2354
Epoch 12/100, Train Loss: 2.0941, Val Loss: 2.1991
Epoch 13/100, Train Loss: 2.0668, Val Loss: 2.1859
Epoch 14/100, Train Loss: 2.0802, Val Loss: 2.1714
Epoch 15/100, Train Loss: 2.0564, Val Loss: 2.1509
Epoch 16/100, Train Loss: 2.0445, Val Loss: 2.1891
Epoch 17/100, Train Loss: 2.0786, Val Loss: 2.1625
Epoch 18/100, Train Loss: 2.0573, Val Loss: 2.1255
Epoch 19/100, Train Loss: 2.0484, Val Loss: 2.1314
Epoch 20/100, Train Loss: 2.0100, Val Lo

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
minimax_draw_rate,▁
minimax_loss_rate,▁
minimax_score,▁
minimax_win_rate,▁
random_draw_rate,▁
random_loss_rate,▁
random_score,▁
random_win_rate,▁
train_loss,██████▇▇▇▇█▇▇▇▇▇▇▇▇▇▅▅▄▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁

0,1
epoch,99.0
minimax_draw_rate,0.5
minimax_loss_rate,0.5
minimax_score,-0.5
minimax_win_rate,0.0
random_draw_rate,0.16
random_loss_rate,0.1
random_score,0.64
random_win_rate,0.74
train_loss,0.69281


[34m[1mwandb[0m: Agent Starting Run: ey4fxj8w with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	attention_layers: 2
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	feedforward_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.003655314929122717
[34m[1mwandb[0m: 	mask_illegal_moves: False
[34m[1mwandb[0m: 	mask_value: -10
[34m[1mwandb[0m: 	norm_first: True
[34m[1mwandb[0m: 	weight_decay: 0.0011689193466952585


Epoch 1/100, Train Loss: 8.1445, Val Loss: 3.4916
Epoch 2/100, Train Loss: 2.7767, Val Loss: 2.5654
Epoch 3/100, Train Loss: 2.4426, Val Loss: 2.4001
Epoch 4/100, Train Loss: 2.3574, Val Loss: 2.3377
Epoch 5/100, Train Loss: 2.2827, Val Loss: 2.3505
Epoch 6/100, Train Loss: 2.2526, Val Loss: 2.2694
Epoch 7/100, Train Loss: 2.1865, Val Loss: 2.2369
Epoch 8/100, Train Loss: 2.1617, Val Loss: 2.1561
Epoch 9/100, Train Loss: 2.1087, Val Loss: 2.1340
Epoch 10/100, Train Loss: 2.0781, Val Loss: 2.1577
Epoch 11/100, Train Loss: 2.0806, Val Loss: 2.1866
Epoch 12/100, Train Loss: 2.0751, Val Loss: 2.1627
Epoch 13/100, Train Loss: 2.0527, Val Loss: 2.1348
Epoch 14/100, Train Loss: 2.0460, Val Loss: 2.1167
Epoch 15/100, Train Loss: 2.0350, Val Loss: 2.1123
Epoch 16/100, Train Loss: 2.0456, Val Loss: 2.1157
Epoch 17/100, Train Loss: 2.0164, Val Loss: 2.1307
Epoch 18/100, Train Loss: 2.0586, Val Loss: 2.1062
Epoch 19/100, Train Loss: 2.0286, Val Loss: 2.1178
Epoch 20/100, Train Loss: 2.0151, Val Lo

0,1
epoch,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇███
minimax_draw_rate,▁
minimax_loss_rate,▁
minimax_score,▁
minimax_win_rate,▁
random_draw_rate,▁
random_loss_rate,▁
random_score,▁
random_win_rate,▁
train_loss,██▇▇▇▆▆▆▆▆▆▆▆▆▆▆▆▄▄▄▂▂▂▂▂▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁

0,1
epoch,99.0
minimax_draw_rate,0.52
minimax_loss_rate,0.48
minimax_score,-0.48
minimax_win_rate,0.0
random_draw_rate,0.06
random_loss_rate,0.06
random_score,0.82
random_win_rate,0.88
train_loss,0.69585


[34m[1mwandb[0m: Agent Starting Run: wpa5v17z with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	attention_layers: 2
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	feedforward_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.003090691855904909
[34m[1mwandb[0m: 	mask_illegal_moves: False
[34m[1mwandb[0m: 	mask_value: -10
[34m[1mwandb[0m: 	norm_first: True
[34m[1mwandb[0m: 	weight_decay: 0.0012229049274340154


Epoch 1/100, Train Loss: 9.8758, Val Loss: 3.7853
Epoch 2/100, Train Loss: 2.9373, Val Loss: 2.4731
Epoch 3/100, Train Loss: 2.5050, Val Loss: 2.3610
Epoch 4/100, Train Loss: 2.4029, Val Loss: 2.3285
Epoch 5/100, Train Loss: 2.3432, Val Loss: 2.2813
Epoch 6/100, Train Loss: 2.3230, Val Loss: 2.3061
Epoch 7/100, Train Loss: 2.3111, Val Loss: 2.1943
Epoch 8/100, Train Loss: 2.2266, Val Loss: 2.1294
Epoch 9/100, Train Loss: 2.2011, Val Loss: 2.0807
Epoch 10/100, Train Loss: 2.1655, Val Loss: 2.0221
Epoch 11/100, Train Loss: 2.1039, Val Loss: 2.0259
Epoch 12/100, Train Loss: 2.0968, Val Loss: 1.9988
Epoch 13/100, Train Loss: 2.0751, Val Loss: 1.9674
Epoch 14/100, Train Loss: 2.0598, Val Loss: 1.9476
Epoch 15/100, Train Loss: 2.0445, Val Loss: 1.9665
Epoch 16/100, Train Loss: 2.0313, Val Loss: 1.9244
Epoch 17/100, Train Loss: 2.0638, Val Loss: 1.9448
Epoch 18/100, Train Loss: 2.0616, Val Loss: 1.9537
Epoch 19/100, Train Loss: 2.0530, Val Loss: 1.9317
Epoch 20/100, Train Loss: 2.0473, Val Lo

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇▇████
minimax_draw_rate,▁
minimax_loss_rate,▁
minimax_score,▁
minimax_win_rate,▁
random_draw_rate,▁
random_loss_rate,▁
random_score,▁
random_win_rate,▁
train_loss,█▇▆▆▆▆▆▅▄▄▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,99.0
minimax_draw_rate,0.51
minimax_loss_rate,0.49
minimax_score,-0.49
minimax_win_rate,0.0
random_draw_rate,0.1
random_loss_rate,0.04
random_score,0.82
random_win_rate,0.86
train_loss,0.6686


[34m[1mwandb[0m: Agent Starting Run: 2x2qfqbp with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	attention_layers: 2
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	feedforward_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0022764431766465567
[34m[1mwandb[0m: 	mask_illegal_moves: False
[34m[1mwandb[0m: 	mask_value: -10
[34m[1mwandb[0m: 	norm_first: True
[34m[1mwandb[0m: 	weight_decay: 0.0010462407271645015


Epoch 1/100, Train Loss: 9.3068, Val Loss: 4.9128
Epoch 2/100, Train Loss: 3.7775, Val Loss: 3.5923
Epoch 3/100, Train Loss: 3.3322, Val Loss: 3.3715
Epoch 4/100, Train Loss: 3.2371, Val Loss: 3.3109
Epoch 5/100, Train Loss: 3.1886, Val Loss: 3.2897
Epoch 6/100, Train Loss: 3.1380, Val Loss: 3.2146
Epoch 7/100, Train Loss: 3.0681, Val Loss: 3.2009
Epoch 8/100, Train Loss: 3.0507, Val Loss: 3.1691
Epoch 9/100, Train Loss: 2.9991, Val Loss: 3.1387
Epoch 10/100, Train Loss: 3.0036, Val Loss: 3.1224
Epoch 11/100, Train Loss: 2.9776, Val Loss: 3.1172
Epoch 12/100, Train Loss: 2.9659, Val Loss: 3.1447
Epoch 13/100, Train Loss: 2.9803, Val Loss: 3.1321
Epoch 14/100, Train Loss: 2.9678, Val Loss: 3.1005
Epoch 15/100, Train Loss: 2.9670, Val Loss: 3.1063
Epoch 16/100, Train Loss: 2.9421, Val Loss: 3.2132
Epoch 17/100, Train Loss: 2.9646, Val Loss: 3.1212
Epoch 18/100, Train Loss: 2.9312, Val Loss: 3.1539
Epoch 19/100, Train Loss: 2.9739, Val Loss: 3.1115
Epoch 20/100, Train Loss: 2.9296, Val Lo

0,1
epoch,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇█
minimax_draw_rate,▁
minimax_loss_rate,▁
minimax_score,▁
minimax_win_rate,▁
random_draw_rate,▁
random_loss_rate,▁
random_score,▁
random_win_rate,▁
train_loss,███▇▇▇▇▇▇▇▇▅▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▂▁

0,1
epoch,99.0
minimax_draw_rate,0.39
minimax_loss_rate,0.61
minimax_score,-0.61
minimax_win_rate,0.0
random_draw_rate,0.11
random_loss_rate,0.07
random_score,0.75
random_win_rate,0.82
train_loss,0.99494


[34m[1mwandb[0m: Agent Starting Run: tkkvq3rd with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	attention_layers: 2
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	feedforward_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0016617797385763788
[34m[1mwandb[0m: 	mask_illegal_moves: False
[34m[1mwandb[0m: 	mask_value: -10
[34m[1mwandb[0m: 	norm_first: True
[34m[1mwandb[0m: 	weight_decay: 0.008053828158009702


Epoch 1/100, Train Loss: 8.2678, Val Loss: 3.4867
Epoch 2/100, Train Loss: 2.7961, Val Loss: 2.3456
Epoch 3/100, Train Loss: 2.4067, Val Loss: 2.3178
Epoch 4/100, Train Loss: 2.3586, Val Loss: 2.1655
Epoch 5/100, Train Loss: 2.3453, Val Loss: 2.1649
Epoch 6/100, Train Loss: 2.2941, Val Loss: 2.1738
Epoch 7/100, Train Loss: 2.2961, Val Loss: 2.1211
Epoch 8/100, Train Loss: 2.2253, Val Loss: 2.0696
Epoch 9/100, Train Loss: 2.2015, Val Loss: 2.0496
Epoch 10/100, Train Loss: 2.1701, Val Loss: 2.0227
Epoch 11/100, Train Loss: 2.1195, Val Loss: 2.0284
Epoch 12/100, Train Loss: 1.9986, Val Loss: 1.8701
Epoch 13/100, Train Loss: 1.8448, Val Loss: 1.7154
Epoch 14/100, Train Loss: 1.7159, Val Loss: 1.9001
Epoch 15/100, Train Loss: 1.5793, Val Loss: 1.4796
Epoch 16/100, Train Loss: 1.5302, Val Loss: 1.6513
Epoch 17/100, Train Loss: 1.6098, Val Loss: 1.4463
Epoch 18/100, Train Loss: 1.5149, Val Loss: 1.9061
Epoch 19/100, Train Loss: 1.5676, Val Loss: 1.4928
Epoch 20/100, Train Loss: 1.4407, Val Lo

0,1
epoch,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇██
minimax_draw_rate,▁
minimax_loss_rate,▁
minimax_score,▁
minimax_win_rate,▁
random_draw_rate,▁
random_loss_rate,▁
random_score,▁
random_win_rate,▁
train_loss,██▇▅▅▄▅▄▄▄▂▂▂▂▂▂▂▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,99.0
minimax_draw_rate,0.38
minimax_loss_rate,0.62
minimax_score,-0.62
minimax_win_rate,0.0
random_draw_rate,0.07
random_loss_rate,0.09
random_score,0.75
random_win_rate,0.84
train_loss,0.78072


[34m[1mwandb[0m: Agent Starting Run: qnzytvjh with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	attention_layers: 2
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	feedforward_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.007389485735414185
[34m[1mwandb[0m: 	mask_illegal_moves: False
[34m[1mwandb[0m: 	mask_value: -10
[34m[1mwandb[0m: 	norm_first: True
[34m[1mwandb[0m: 	weight_decay: 0.001990704081334658


Epoch 1/100, Train Loss: 13.0143, Val Loss: 4.3739
Epoch 2/100, Train Loss: 3.6614, Val Loss: 3.0576
Epoch 3/100, Train Loss: 2.9543, Val Loss: 2.7310
Epoch 4/100, Train Loss: 2.7254, Val Loss: 2.6007
Epoch 5/100, Train Loss: 2.5821, Val Loss: 2.4762
Epoch 6/100, Train Loss: 2.4746, Val Loss: 2.3916
Epoch 7/100, Train Loss: 2.3957, Val Loss: 2.3087
Epoch 8/100, Train Loss: 2.3751, Val Loss: 2.2809
Epoch 9/100, Train Loss: 2.3647, Val Loss: 2.3000
Epoch 10/100, Train Loss: 2.3446, Val Loss: 2.2537
Epoch 11/100, Train Loss: 2.3261, Val Loss: 2.2177
Epoch 12/100, Train Loss: 2.2493, Val Loss: 2.1509
Epoch 13/100, Train Loss: 2.2032, Val Loss: 2.1253
Epoch 14/100, Train Loss: 2.1966, Val Loss: 2.1260
Epoch 15/100, Train Loss: 2.1692, Val Loss: 2.0705
Epoch 16/100, Train Loss: 2.1548, Val Loss: 2.0548
Epoch 17/100, Train Loss: 2.1754, Val Loss: 2.2019
Epoch 18/100, Train Loss: 2.1994, Val Loss: 2.1752
Epoch 19/100, Train Loss: 2.1600, Val Loss: 2.0984
Epoch 20/100, Train Loss: 2.1150, Val L

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇███
minimax_draw_rate,▁
minimax_loss_rate,▁
minimax_score,▁
minimax_win_rate,▁
random_draw_rate,▁
random_loss_rate,▁
random_score,▁
random_win_rate,▁
train_loss,█▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,99.0
minimax_draw_rate,0.44
minimax_loss_rate,0.56
minimax_score,-0.56
minimax_win_rate,0.0
random_draw_rate,0.13
random_loss_rate,0.07
random_score,0.73
random_win_rate,0.8
train_loss,0.78008


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5ridwn7i with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	attention_layers: 2
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	feedforward_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0005106250254488596
[34m[1mwandb[0m: 	mask_illegal_moves: False
[34m[1mwandb[0m: 	mask_value: -10
[34m[1mwandb[0m: 	norm_first: True
[34m[1mwandb[0m: 	weight_decay: 0.007994626764514123


Epoch 1/100, Train Loss: 10.7387, Val Loss: 5.3953
Epoch 2/100, Train Loss: 3.9055, Val Loss: 3.2733
Epoch 3/100, Train Loss: 2.6786, Val Loss: 2.6055
Epoch 4/100, Train Loss: 2.4080, Val Loss: 2.4623
Epoch 5/100, Train Loss: 2.3586, Val Loss: 2.4027
Epoch 6/100, Train Loss: 2.3004, Val Loss: 2.4183
Epoch 7/100, Train Loss: 2.3066, Val Loss: 2.4017
Epoch 8/100, Train Loss: 2.2678, Val Loss: 2.3573
Epoch 9/100, Train Loss: 2.2443, Val Loss: 2.3405
Epoch 10/100, Train Loss: 2.2194, Val Loss: 2.3821
Epoch 11/100, Train Loss: 2.1273, Val Loss: 2.3290
Epoch 12/100, Train Loss: 2.0572, Val Loss: 2.2139
Epoch 13/100, Train Loss: 2.0522, Val Loss: 2.1501
Epoch 14/100, Train Loss: 1.9819, Val Loss: 2.0529
Epoch 15/100, Train Loss: 1.9530, Val Loss: 2.0616
Epoch 16/100, Train Loss: 1.9179, Val Loss: 2.0321
Epoch 17/100, Train Loss: 1.8635, Val Loss: 1.8318
Epoch 18/100, Train Loss: 1.7350, Val Loss: 1.5686
Epoch 19/100, Train Loss: 1.6695, Val Loss: 1.9522
Epoch 20/100, Train Loss: 1.7087, Val L

0,1
epoch,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇████
minimax_draw_rate,▁
minimax_loss_rate,▁
minimax_score,▁
minimax_win_rate,▁
random_draw_rate,▁
random_loss_rate,▁
random_score,▁
random_win_rate,▁
train_loss,█▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,99.0
minimax_draw_rate,0.27
minimax_loss_rate,0.73
minimax_score,-0.73
minimax_win_rate,0.0
random_draw_rate,0.05
random_loss_rate,0.11
random_score,0.73
random_win_rate,0.84
train_loss,0.81719


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7k5myfra with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	attention_layers: 2
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	feedforward_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.008720537019602743
[34m[1mwandb[0m: 	mask_illegal_moves: False
[34m[1mwandb[0m: 	mask_value: -10
[34m[1mwandb[0m: 	norm_first: True
[34m[1mwandb[0m: 	weight_decay: 0.0026425771192106744


Epoch 1/100, Train Loss: 11.6797, Val Loss: 4.8711
Epoch 2/100, Train Loss: 3.6080, Val Loss: 3.1803
Epoch 3/100, Train Loss: 2.9086, Val Loss: 2.8337
Epoch 4/100, Train Loss: 2.6532, Val Loss: 2.6302
Epoch 5/100, Train Loss: 2.5216, Val Loss: 2.4717
Epoch 6/100, Train Loss: 2.4047, Val Loss: 2.4097
Epoch 7/100, Train Loss: 2.3838, Val Loss: 2.3372
Epoch 8/100, Train Loss: 2.3037, Val Loss: 2.2873
Epoch 9/100, Train Loss: 2.2726, Val Loss: 2.2754
Epoch 10/100, Train Loss: 2.2697, Val Loss: 2.2487
Epoch 11/100, Train Loss: 2.2411, Val Loss: 2.2339
Epoch 12/100, Train Loss: 2.1914, Val Loss: 2.2078
Epoch 13/100, Train Loss: 2.1955, Val Loss: 2.2338
Epoch 14/100, Train Loss: 2.1507, Val Loss: 2.1332
Epoch 15/100, Train Loss: 2.1280, Val Loss: 2.1187
Epoch 16/100, Train Loss: 2.1251, Val Loss: 2.3554
Epoch 17/100, Train Loss: 2.2398, Val Loss: 2.1779
Epoch 18/100, Train Loss: 2.1291, Val Loss: 2.1827
Epoch 19/100, Train Loss: 2.1188, Val Loss: 2.1460
Epoch 20/100, Train Loss: 2.0992, Val L

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇███
minimax_draw_rate,▁
minimax_loss_rate,▁
minimax_score,▁
minimax_win_rate,▁
random_draw_rate,▁
random_loss_rate,▁
random_score,▁
random_win_rate,▁
train_loss,█▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁

0,1
epoch,99.0
minimax_draw_rate,0.39
minimax_loss_rate,0.61
minimax_score,-0.61
minimax_win_rate,0.0
random_draw_rate,0.11
random_loss_rate,0.1
random_score,0.69
random_win_rate,0.79
train_loss,0.81549


[34m[1mwandb[0m: Agent Starting Run: qlxq8byf with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	attention_layers: 2
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	feedforward_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0019110599548890536
[34m[1mwandb[0m: 	mask_illegal_moves: False
[34m[1mwandb[0m: 	mask_value: -10
[34m[1mwandb[0m: 	norm_first: True
[34m[1mwandb[0m: 	weight_decay: 0.0068961581608436035


Epoch 1/100, Train Loss: 6.4687, Val Loss: 3.0408
Epoch 2/100, Train Loss: 2.6940, Val Loss: 2.2717
Epoch 3/100, Train Loss: 2.4191, Val Loss: 2.2432
Epoch 4/100, Train Loss: 2.3922, Val Loss: 2.1893
Epoch 5/100, Train Loss: 2.3752, Val Loss: 2.2933
Epoch 6/100, Train Loss: 2.3065, Val Loss: 2.1845
Epoch 7/100, Train Loss: 2.2819, Val Loss: 2.1300
Epoch 8/100, Train Loss: 2.2370, Val Loss: 2.2536
Epoch 9/100, Train Loss: 2.2059, Val Loss: 2.0603
Epoch 10/100, Train Loss: 2.1681, Val Loss: 2.0633
Epoch 11/100, Train Loss: 2.1454, Val Loss: 2.0371
Epoch 12/100, Train Loss: 2.1566, Val Loss: 2.0385
Epoch 13/100, Train Loss: 2.1292, Val Loss: 2.0155
Epoch 14/100, Train Loss: 2.0860, Val Loss: 1.9875
Epoch 15/100, Train Loss: 2.1000, Val Loss: 2.0115
Epoch 16/100, Train Loss: 2.0819, Val Loss: 1.9840
Epoch 17/100, Train Loss: 2.0503, Val Loss: 1.9768
Epoch 18/100, Train Loss: 2.0537, Val Loss: 1.9559
Epoch 19/100, Train Loss: 2.0556, Val Loss: 1.9456
Epoch 20/100, Train Loss: 2.0275, Val Lo

0,1
epoch,▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇███
minimax_draw_rate,▁
minimax_loss_rate,▁
minimax_score,▁
minimax_win_rate,▁
random_draw_rate,▁
random_loss_rate,▁
random_score,▁
random_win_rate,▁
train_loss,█▃▃▃▃▃▃▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,99.0
minimax_draw_rate,0.32
minimax_loss_rate,0.68
minimax_score,-0.68
minimax_win_rate,0.0
random_draw_rate,0.06
random_loss_rate,0.12
random_score,0.7
random_win_rate,0.82
train_loss,0.78309


[34m[1mwandb[0m: Agent Starting Run: elccheot with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	attention_layers: 2
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	feedforward_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.007691465182645302
[34m[1mwandb[0m: 	mask_illegal_moves: False
[34m[1mwandb[0m: 	mask_value: -10
[34m[1mwandb[0m: 	norm_first: True
[34m[1mwandb[0m: 	weight_decay: 0.0026493562204257005


Epoch 1/100, Train Loss: 14.2158, Val Loss: 5.0924
Epoch 2/100, Train Loss: 4.4702, Val Loss: 4.1595
Epoch 3/100, Train Loss: 3.8616, Val Loss: 3.9302
Epoch 4/100, Train Loss: 3.6851, Val Loss: 3.7809
Epoch 5/100, Train Loss: 3.5559, Val Loss: 3.6565
Epoch 6/100, Train Loss: 3.4187, Val Loss: 3.5267
Epoch 7/100, Train Loss: 3.3171, Val Loss: 3.4003
Epoch 8/100, Train Loss: 3.2342, Val Loss: 3.3838
Epoch 9/100, Train Loss: 3.2053, Val Loss: 3.3524
Epoch 10/100, Train Loss: 3.1990, Val Loss: 3.2895
Epoch 11/100, Train Loss: 3.1509, Val Loss: 3.2606
Epoch 12/100, Train Loss: 3.1200, Val Loss: 3.1999
Epoch 13/100, Train Loss: 3.0799, Val Loss: 3.1953
Epoch 14/100, Train Loss: 3.0680, Val Loss: 3.1189
Epoch 15/100, Train Loss: 1.7612, Val Loss: 1.5328
Epoch 16/100, Train Loss: 1.4809, Val Loss: 1.4253
Epoch 17/100, Train Loss: 1.3863, Val Loss: 1.8713
Epoch 18/100, Train Loss: 1.4375, Val Loss: 1.4448
Epoch 19/100, Train Loss: 1.3769, Val Loss: 1.2919
Epoch 20/100, Train Loss: 1.3458, Val L

0,1
epoch,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇█████
minimax_draw_rate,▁
minimax_loss_rate,▁
minimax_score,▁
minimax_win_rate,▁
random_draw_rate,▁
random_loss_rate,▁
random_score,▁
random_win_rate,▁
train_loss,█▆▆▅▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,99.0
minimax_draw_rate,0.42
minimax_loss_rate,0.58
minimax_score,-0.58
minimax_win_rate,0.0
random_draw_rate,0.12
random_loss_rate,0.08
random_score,0.72
random_win_rate,0.8
train_loss,0.76977


[34m[1mwandb[0m: Agent Starting Run: l1g9daww with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	attention_layers: 2
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	feedforward_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.008211528187836081
[34m[1mwandb[0m: 	mask_illegal_moves: False
[34m[1mwandb[0m: 	mask_value: -10
[34m[1mwandb[0m: 	norm_first: True
[34m[1mwandb[0m: 	weight_decay: 0.002701470765062942


Epoch 1/100, Train Loss: 10.3492, Val Loss: 3.5300
Epoch 2/100, Train Loss: 2.9449, Val Loss: 2.2571
Epoch 3/100, Train Loss: 2.2393, Val Loss: 2.1916
Epoch 4/100, Train Loss: 2.2781, Val Loss: 2.4430
Epoch 5/100, Train Loss: 2.1491, Val Loss: 1.8126
Epoch 6/100, Train Loss: 1.9042, Val Loss: 1.6946
Epoch 7/100, Train Loss: 1.9823, Val Loss: 2.1946
Epoch 8/100, Train Loss: 1.9588, Val Loss: 1.7051
Epoch 9/100, Train Loss: 1.7767, Val Loss: 2.4428
Epoch 10/100, Train Loss: 1.7208, Val Loss: 1.8070
Epoch 11/100, Train Loss: 1.8686, Val Loss: 1.6130
Epoch 12/100, Train Loss: 1.6354, Val Loss: 1.4431
Epoch 13/100, Train Loss: 1.4641, Val Loss: 1.3708
Epoch 14/100, Train Loss: 1.4504, Val Loss: 1.7718
Epoch 15/100, Train Loss: 1.6445, Val Loss: 1.7920
Epoch 16/100, Train Loss: 1.5052, Val Loss: 1.7109
Epoch 17/100, Train Loss: 1.5581, Val Loss: 1.3175
Epoch 18/100, Train Loss: 1.4158, Val Loss: 1.6078
Epoch 19/100, Train Loss: 1.3833, Val Loss: 1.3247
Epoch 20/100, Train Loss: 1.3041, Val L

0,1
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▇▇▇▇▇▇▇██
minimax_draw_rate,▁
minimax_loss_rate,▁
minimax_score,▁
minimax_win_rate,▁
random_draw_rate,▁
random_loss_rate,▁
random_score,▁
random_win_rate,▁
train_loss,█▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,99.0
minimax_draw_rate,0.36
minimax_loss_rate,0.64
minimax_score,-0.64
minimax_win_rate,0.0
random_draw_rate,0.11
random_loss_rate,0.11
random_score,0.67
random_win_rate,0.78
train_loss,0.87298


[34m[1mwandb[0m: Agent Starting Run: 1vx4k5r6 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	attention_layers: 2
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	feedforward_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.004938899878639627
[34m[1mwandb[0m: 	mask_illegal_moves: False
[34m[1mwandb[0m: 	mask_value: -10
[34m[1mwandb[0m: 	norm_first: True
[34m[1mwandb[0m: 	weight_decay: 0.005227006619051681


Epoch 1/100, Train Loss: 9.2560, Val Loss: 3.7246
Epoch 2/100, Train Loss: 2.9562, Val Loss: 2.5630
Epoch 3/100, Train Loss: 2.5048, Val Loss: 2.5525
Epoch 4/100, Train Loss: 2.4067, Val Loss: 2.4658
Epoch 5/100, Train Loss: 2.3297, Val Loss: 2.3991
Epoch 6/100, Train Loss: 2.2680, Val Loss: 2.4064
Epoch 7/100, Train Loss: 2.2802, Val Loss: 2.3091
Epoch 8/100, Train Loss: 2.2331, Val Loss: 2.3145
Epoch 9/100, Train Loss: 2.2019, Val Loss: 2.2319
Epoch 10/100, Train Loss: 2.1736, Val Loss: 2.2747
Epoch 11/100, Train Loss: 2.1491, Val Loss: 2.2042
Epoch 12/100, Train Loss: 2.1006, Val Loss: 2.1545
Epoch 13/100, Train Loss: 2.0693, Val Loss: 2.1373
Epoch 14/100, Train Loss: 2.0818, Val Loss: 2.1185
Epoch 15/100, Train Loss: 2.0592, Val Loss: 2.1113
Epoch 16/100, Train Loss: 2.0501, Val Loss: 2.1163
Epoch 17/100, Train Loss: 2.0578, Val Loss: 2.0843
Epoch 18/100, Train Loss: 2.0341, Val Loss: 2.1088
Epoch 19/100, Train Loss: 2.0632, Val Loss: 2.0964
Epoch 20/100, Train Loss: 2.0302, Val Lo

0,1
epoch,▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇▇███
minimax_draw_rate,▁
minimax_loss_rate,▁
minimax_score,▁
minimax_win_rate,▁
random_draw_rate,▁
random_loss_rate,▁
random_score,▁
random_win_rate,▁
train_loss,█▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,99.0
minimax_draw_rate,0.42
minimax_loss_rate,0.58
minimax_score,-0.58
minimax_win_rate,0.0
random_draw_rate,0.08
random_loss_rate,0.15
random_score,0.62
random_win_rate,0.77
train_loss,0.7794


[34m[1mwandb[0m: Agent Starting Run: iatpzvs4 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	attention_layers: 2
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	feedforward_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.009314506414361929
[34m[1mwandb[0m: 	mask_illegal_moves: False
[34m[1mwandb[0m: 	mask_value: -10
[34m[1mwandb[0m: 	norm_first: True
[34m[1mwandb[0m: 	weight_decay: 0.008564587216366352


Epoch 1/100, Train Loss: 11.0705, Val Loss: 3.8094
Epoch 2/100, Train Loss: 3.2318, Val Loss: 2.9071
Epoch 3/100, Train Loss: 2.7688, Val Loss: 2.6740
Epoch 4/100, Train Loss: 2.6211, Val Loss: 2.5917
Epoch 5/100, Train Loss: 2.5301, Val Loss: 2.5288
Epoch 6/100, Train Loss: 2.4512, Val Loss: 2.4905
Epoch 7/100, Train Loss: 2.4145, Val Loss: 2.4508
Epoch 8/100, Train Loss: 2.3718, Val Loss: 2.4364
Epoch 9/100, Train Loss: 2.3665, Val Loss: 2.4406
Epoch 10/100, Train Loss: 2.3735, Val Loss: 2.4477
Epoch 11/100, Train Loss: 2.4767, Val Loss: 2.3892
Epoch 12/100, Train Loss: 2.3481, Val Loss: 2.4120
Epoch 13/100, Train Loss: 2.2975, Val Loss: 2.3960
Epoch 14/100, Train Loss: 2.2553, Val Loss: 2.3768
Epoch 15/100, Train Loss: 2.2289, Val Loss: 2.3078
Epoch 16/100, Train Loss: 2.1832, Val Loss: 2.2432
Epoch 17/100, Train Loss: 2.1455, Val Loss: 2.2407
Epoch 18/100, Train Loss: 2.1329, Val Loss: 2.1885
Epoch 19/100, Train Loss: 2.1081, Val Loss: 2.2843
Epoch 20/100, Train Loss: 2.1207, Val L

0,1
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▄▄▄▄▄▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇█████
minimax_draw_rate,▁
minimax_loss_rate,▁
minimax_score,▁
minimax_win_rate,▁
random_draw_rate,▁
random_loss_rate,▁
random_score,▁
random_win_rate,▁
train_loss,▇▇▆▆▆▆▆▆▅▅▅▅▅▅▅██████▇▅▅▅▅▅▅▅▅▅▃▂▂▂▁▁▁▁▁

0,1
epoch,99.0
minimax_draw_rate,0.31
minimax_loss_rate,0.69
minimax_score,-0.69
minimax_win_rate,0.0
random_draw_rate,0.1
random_loss_rate,0.16
random_score,0.58
random_win_rate,0.74
train_loss,0.91586
