In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

from bots.mcts_cnn_bots.tensors import df_to_tensors
from bots.mcts_cnn_bots.JassCNN import JassCNN

In [2]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


In [3]:
training_data = Path("./data/parquet/data.parquet")

In [4]:
df = pd.read_parquet(training_data)

In [5]:
def shuffle_data(df: pd.DataFrame, random_state=42):
    return df.sample(frac=1, random_state=random_state).reset_index(drop=True)

In [6]:
def split_data(state_tensors, policy_targets, value_targets, test_size=0.2, random_state=42):
    indices = np.arange(len(state_tensors))
    train_idx, test_idx = train_test_split(indices, test_size=test_size, random_state=random_state)

    train_dataset = TensorDataset(state_tensors[train_idx], policy_targets[train_idx], value_targets[train_idx])
    test_dataset = TensorDataset(state_tensors[test_idx], policy_targets[test_idx], value_targets[test_idx])

    return train_dataset, test_dataset

In [7]:
def train_model(model, train_loader, test_loader, epochs, lr):
    training_loss_value = []
    training_loss_policy = []
    validation_loss_value = []
    validation_loss_policy = []
    
    criterion_policy = nn.CrossEntropyLoss()
    criterion_value = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(epochs):
        # Training phase
        model.train()
        train_policy_loss = 0.0
        train_value_loss = 0.0

        for state_batch, policy_batch, value_batch in train_loader:
            state_batch = state_batch.to(device)
            policy_batch = torch.argmax(policy_batch, dim=1)  # Converts one-hot to class indices
            policy_batch = policy_batch.to(device)
            value_batch = value_batch.to(device)
            
            optimizer.zero_grad()

            # Forward pass
            policy_pred, value_pred = model(state_batch)
            policy_loss = criterion_policy(policy_pred, policy_batch)
            value_loss = criterion_value(value_pred, value_batch)
            total_loss = policy_loss + value_loss

            # Backward pass
            total_loss.backward()
            optimizer.step()

            train_policy_loss += policy_loss.item()
            train_value_loss += value_loss.item()
            
        training_loss_value.append(train_value_loss / len(train_loader))
        training_loss_policy.append(train_policy_loss / len(train_loader))

        # Validation phase
        model.eval()
        val_policy_loss = 0.0
        val_value_loss = 0.0

        with torch.no_grad():
            for state_batch, policy_batch, value_batch in test_loader:
                policy_batch = torch.argmax(policy_batch, dim=1)  # Converts one-hot to class indices
                policy_pred, value_pred = model(state_batch)
                val_policy_loss += criterion_policy(policy_pred, policy_batch).item()
                val_value_loss += criterion_value(value_pred, value_batch).item()
                
        validation_loss_value.append(val_value_loss / len(test_loader))
        validation_loss_policy.append(val_policy_loss / len(test_loader))

        print(
            f"Epoch {epoch+1}/{epochs}, "
            f"Train Policy Loss: {training_loss_policy[-1]:.4f}, Train Value Loss: {training_loss_value[-1]:.4f}, "
            f"Val Policy Loss: {validation_loss_policy[-1]:.4f}, Val Value Loss: {validation_loss_value[-1]:.4f}"
        )

In [8]:
df = shuffle_data(df, random_state=42)

In [9]:
state_tensors, policy_targets, value_targets = df_to_tensors(df)

In [10]:
train_state, test_state, train_policy, test_policy, train_value, test_value = train_test_split(
    state_tensors, policy_targets, value_targets, test_size=0.2, random_state=42
)

train_state = train_state.to(device)
train_policy = train_policy.to(device)
train_value = train_value.to(device)
test_state = test_state.to(device)
test_policy = test_policy.to(device)
test_value = test_value.to(device)

train_dataset = TensorDataset(train_state, train_policy, train_value)
test_dataset = TensorDataset(test_state, test_policy, test_value)

In [11]:
model = JassCNN().to(device)

In [12]:
BATCH_SIZE = 32
EPOCHS = 20
LEARNING_RATE = 0.001

In [13]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

train_model(model, train_loader, test_loader, epochs=EPOCHS, lr=LEARNING_RATE)

Epoch 1/20, Train Policy Loss: 3.5831, Train Value Loss: 0.0668, Val Policy Loss: 3.5846, Val Value Loss: 0.0402
Epoch 2/20, Train Policy Loss: 3.5811, Train Value Loss: 0.0427, Val Policy Loss: 3.5843, Val Value Loss: 0.0373
Epoch 3/20, Train Policy Loss: 3.5768, Train Value Loss: 0.0390, Val Policy Loss: 3.5850, Val Value Loss: 0.0345
Epoch 4/20, Train Policy Loss: 3.5764, Train Value Loss: 0.0375, Val Policy Loss: 3.5879, Val Value Loss: 0.0359
Epoch 5/20, Train Policy Loss: 3.5744, Train Value Loss: 0.0348, Val Policy Loss: 3.5861, Val Value Loss: 0.0370
Epoch 6/20, Train Policy Loss: 3.5693, Train Value Loss: 0.0360, Val Policy Loss: 3.5877, Val Value Loss: 0.0376
Epoch 7/20, Train Policy Loss: 3.5708, Train Value Loss: 0.0361, Val Policy Loss: 3.5877, Val Value Loss: 0.0379
Epoch 8/20, Train Policy Loss: 3.5678, Train Value Loss: 0.0352, Val Policy Loss: 3.5870, Val Value Loss: 0.0404
Epoch 9/20, Train Policy Loss: 3.5633, Train Value Loss: 0.0327, Val Policy Loss: 3.5873, Val Va