In [None]:
import chess

def moves_to_mate(fen, solution_moves):
    board = chess.Board(fen)
    move_count = 0

    for uci in solution_moves:
        move = chess.Move.from_uci(uci)
        if move in board.legal_moves:
            board.push(move)
            if board.turn == chess.WHITE:  # full move completed (after Blackâ€™s move)
                move_count += 1
        else:
            return 0  # illegal move, not a valid mate sequence

    return move_count if board.is_checkmate() else 0

In [18]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import chess
# Utility: convert FEN to tensor with positional features
def fen_to_tensor(fen):
    board = chess.Board(fen)
    # Base piece planes: 12 channels
    planes = np.zeros((12, 8, 8), dtype=np.float32)
    for sq, piece in board.piece_map().items():
        idx = {'P':0,'N':1,'B':2,'R':3,'Q':4,'K':5}[piece.symbol().upper()]
        color_offset = 0 if piece.color == chess.WHITE else 6
        row = 7 - (sq // 8)
        col = sq % 8
        planes[idx + color_offset, row, col] = 1

    # Side to move plane
    stm_plane = np.full((1, 8, 8), float(board.turn), dtype=np.float32)

    # Additional positional features: 7 channels
    # Attack maps (white, black)
    attack_w = np.zeros((8, 8), dtype=np.float32)
    attack_b = np.zeros((8, 8), dtype=np.float32)
    for sq in chess.SQUARES:
        r = 7 - (sq // 8)
        c = sq % 8
        if board.attackers(chess.WHITE, sq):
            attack_w[r, c] = 1
        if board.attackers(chess.BLACK, sq):
            attack_b[r, c] = 1

    # Legal move mask
    legal_mask = np.zeros((8, 8), dtype=np.float32)
    for mv in board.legal_moves:
        r = 7 - (mv.to_square // 8)
        c = mv.to_square % 8
        legal_mask[r, c] = 1

    # Distance to kings
    dist_wk = np.zeros((8, 8), dtype=np.float32)
    dist_bk = np.zeros((8, 8), dtype=np.float32)
    wksq = board.king(chess.WHITE)
    bksq = board.king(chess.BLACK)
    for sq in chess.SQUARES:
        r = 7 - (sq // 8)
        c = sq % 8
        if wksq is not None:
            dist_wk[r, c] = chess.square_distance(sq, wksq)
        if bksq is not None:
            dist_bk[r, c] = chess.square_distance(sq, bksq)

    # Check status plane
    check_pl = np.full((8, 8), float(board.is_check()), dtype=np.float32)

    # Pinned pieces map
    pinned = np.zeros((8, 8), dtype=np.float32)
    for sq in chess.SQUARES:
        piece = board.piece_at(sq)
        if piece and board.is_pinned(piece.color, sq):
            r = 7 - (sq // 8)
            c = sq % 8
            pinned[r, c] = 1
    # Checking moves mask
    checking_moves_mask = np.zeros((8,8), dtype=np.float32)
    for mv in board.legal_moves:
        board.push(mv)
        if board.is_check():
            r = 7 - (mv.to_square // 8)
            c = mv.to_square % 8
            checking_moves_mask[r, c] = 1
        board.pop()

    controlled_white = np.zeros((8,8), dtype=np.float32)
    controlled_black = np.zeros((8,8), dtype=np.float32)
    for sq in chess.SQUARES:
        r = 7 - (sq // 8)
        c = sq % 8
        white_attackers = len(board.attackers(chess.WHITE, sq))
        black_attackers = len(board.attackers(chess.BLACK, sq))
        if white_attackers > black_attackers:
            controlled_white[r, c] = 1.0
        elif black_attackers > white_attackers:
            controlled_black[r, c] = 1.0

     # Stack all planes: 12 + 1 + 2 + 1 + 2 + 1 + 1 + 2 = 22 channels
    extra = [attack_w, attack_b, legal_mask, dist_wk, dist_bk, check_pl, pinned, controlled_white, controlled_black, checking_moves_mask]
    feature_planes = np.stack(extra, axis=0)
    all_planes = np.concatenate([planes, stm_plane, feature_planes], axis=0)

    return torch.from_numpy(all_planes)


In [20]:
# --- Hyperparameters & Constants ---
EPOCHS = 1                # Number of epochs to train
BATCH_SIZE = 512          # Number of samples per batch
MAX_BATCHES_PER_EPOCH = 600  # Process exactly 600 batches per epoch
LR = 1e-2                 # Learning rate
BINARY_CLASSES = 2        # Number of classes for binary classification
DATA_PATH = 'data/trainingpuzzles.csv'
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Use more workers but be mindful of CPU limits
train_loader = DataLoader(..., batch_size=32, num_workers=4, pin_memory=False)

# Limit PyTorch CPU threads
torch.set_num_threads(4)

# Precompute fen tensors once outside of training
# Save processed tensors to disk, then load quickly during training

class NumberMateCNN(nn.Module):
    def __init__(self):
        super(NumberMateCNN, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(23, 32, 3, padding=1), nn.ReLU(),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            nn.Flatten(), nn.Linear(128*2*2, 256), nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(256, 1)
        )
    def forward(self, x):
        out = self.fc(self.conv(x)).squeeze(-1)
        return torch.sigmoid(out)  # outputs in [0,1]

# Binary classification model
class IsMateCNN(nn.Module):
    def __init__(self):
        super(IsMateCNN, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(23, 32, 3, padding=1), nn.ReLU(),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            nn.Flatten(), nn.Linear(128*2*2, 256), nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(256, BINARY_CLASSES)
        )
    def forward(self, x): return self.fc(self.conv(x))

import json

def train_binary(model, train_loader, val_loader, device, save_path='binary_model.pt', max_batches=600):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=LR)
    model.to(device)

    avg_train_losses = []
    avg_val_losses = []
    val_accuracies = []

    for epoch in range(EPOCHS):
        model.train()
        total_loss = 0
        batch_count = 0
        samples_seen = 0

        for x, y in train_loader:
            if batch_count >= max_batches:
                break
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            logits = model(x)
            loss = criterion(logits, y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item() * x.size(0)
            samples_seen += x.size(0)
            batch_count += 1

        avg_train_loss = total_loss / samples_seen
        avg_train_losses.append(avg_train_loss)

        # Validation
        model.eval()
        val_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for x_val, y_val in val_loader:
                x_val, y_val = x_val.to(device), y_val.to(device)
                logits = model(x_val)
                loss = criterion(logits, y_val)
                val_loss += loss.item() * x_val.size(0)
                preds = logits.argmax(dim=1)
                correct += (preds == y_val).sum().item()
                total += y_val.size(0)

        avg_val_loss = val_loss / total
        val_accuracy = correct / total

        avg_val_losses.append(avg_val_loss)
        val_accuracies.append(val_accuracy)

        print(f"Epoch {epoch+1} Train Loss: {avg_train_loss:.4f} Val Loss: {avg_val_loss:.4f} Val Acc: {val_accuracy:.4f}")

        torch.save(model.state_dict(), f"{save_path}_epoch{epoch+1}.pt")

    # Save metrics to JSON file
    with open(f"{save_path}_metrics.json", 'w') as f:
        json.dump({
            'train_loss': avg_train_losses,
            'val_loss': avg_val_losses,
            'val_accuracy': val_accuracies
        }, f)

def validate_distance(model, loader, device, criterion):
    model.eval()
    total_loss = 0
    total_samples = 0
    with torch.no_grad():
        for x, t in loader:
            x, t = x.to(device), t.to(device)
            t_recip = torch.where(t > 0, 1.0 / t, torch.zeros_like(t))
            pred = model(x)
            loss = criterion(pred, t_recip)
            total_loss += loss.item() * x.size(0)
            total_samples += x.size(0)
    avg_loss = total_loss / total_samples
    return avg_loss


import json
import torch
import torch.nn as nn
import torch.optim as optim

def train_distance(model, train_loader, val_loader, device, save_path='distance_model.pt'):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=LR)
    model.to(device)

    EPOCHS = 30
    error_threshold = 1  # acceptable error threshold in mate moves

    avg_train_losses = []
    avg_val_losses = []
    val_accuracies = []
    val_mean_abs_errors = []

    for epoch in range(EPOCHS):
        model.train()
        total_train_loss = 0
        for x, t in train_loader:
            x, t = x.to(device), t.to(device)
            # target: inverse mate distance (1/mate_moves), 0 if no mate
            t_recip = torch.where(t > 0, 1.0 / t, torch.zeros_like(t))
            optimizer.zero_grad()
            pred = model(x)
            loss = criterion(pred, t_recip)
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item() * x.size(0)
        avg_train_loss = total_train_loss / len(train_loader.dataset)
        avg_train_losses.append(avg_train_loss)

        # --- Validation ---
        model.eval()
        val_loss = 0
        abs_error_sum = 0
        correct_within_threshold = 0
        total = 0

        with torch.no_grad():
            for x_val, t_val in val_loader:
                x_val, t_val = x_val.to(device), t_val.to(device)
                t_recip_val = torch.where(t_val > 0, 1.0 / t_val, torch.zeros_like(t_val))
                pred_val = model(x_val)

                loss_val = criterion(pred_val, t_recip_val)
                val_loss += loss_val.item() * x_val.size(0)

                # Convert predictions back to mate moves
                pred_moves = torch.where(pred_val > 0, 1.0 / pred_val, torch.full_like(pred_val, 1000.0))
                abs_move_errors = torch.abs(pred_moves - t_val)

                abs_error_sum += abs_move_errors.sum().item()
                correct_within_threshold += (abs_move_errors <= error_threshold).sum().item()
                total += t_val.size(0)

        avg_val_loss = val_loss / len(val_loader.dataset)
        avg_val_losses.append(avg_val_loss)

        mean_abs_error = abs_error_sum / total
        val_mean_abs_errors.append(mean_abs_error)

        val_accuracy = correct_within_threshold / total
        val_accuracies.append(val_accuracy)

        print(f"Epoch {epoch+1}: Train Loss={avg_train_loss:.4f}, Val Loss={avg_val_loss:.4f}, Val Acc={val_accuracy:.4f}, Mean Abs Error={mean_abs_error:.4f} moves")

        torch.save(model.state_dict(), f"{save_path}_epoch{epoch+1}.pt")

    # Save metrics to JSON
    with open(f"{save_path}_metrics.json", 'w') as f:
        json.dump({
            'train_loss': avg_train_losses,
            'val_loss': avg_val_losses,
            'val_accuracy': val_accuracies,
            'val_mean_abs_error': val_mean_abs_errors
        }, f)





In [21]:
import os
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader, Subset
from sklearn.model_selection import train_test_split

# --- Constants ---
EPOCHS = 1
BATCH_SIZE = 512
LR = 1e-2
BINARY_CLASSES = 2
DATA_PATH = 'data/trainingpuzzles.csv'
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# --- Dataset class (uses your fen_to_tensor and parses labels accordingly) ---
class ChessPuzzleBinaryDataset(Dataset):
    def __init__(self, csv_file, task='binary'):
        self.df = pd.read_csv(csv_file)
        self.task = task  # 'binary' or 'distance'

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        fen = row['FEN']
        moves = row['Moves']
        x = fen_to_tensor(fen)

        if self.task == 'binary':
            # Label: 1 if it's a mate puzzle (contains "#"), else 0
            is_mate = 1 if '#' in moves else 0
            y = torch.tensor(is_mate, dtype=torch.long)
        else:
            # Distance: number of moves in mate (e.g., "1. Qh5#")
            try:
                move_list = moves.strip().split()
                mate_move = next((m for m in move_list if '#' in m), None)
                mate_dist = int(mate_move.replace('#', '')[-1]) if mate_move else 0
            except:
                mate_dist = 0
            y = torch.tensor(mate_dist, dtype=torch.float32)
        return x, y

# --- Load and split dataset ---
full_ds = ChessPuzzleBinaryDataset(DATA_PATH, task='binary')
indices = list(range(len(full_ds)))
train_idx, val_idx = train_test_split(indices, test_size=0.1, random_state=42)

train_loader = DataLoader(Subset(full_ds, train_idx), batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
val_loader   = DataLoader(Subset(full_ds, val_idx), batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

# --- Train binary classification model ---
binary_model = IsMateCNN()
train_binary(binary_model, train_loader, val_loader, DEVICE, save_path='binary_model')

# --- Switch dataset mode to distance regression ---
full_ds.task = 'distance'
train_loader = DataLoader(Subset(full_ds, train_idx), batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

# --- Train mate-distance regression model ---
distance_model = NumberMateCNN()
train_distance(distance_model, val_loader, train_loader, DEVICE, save_path='distance_model')




Epoch 1 Train Loss: 0.0012 Val Loss: 0.0000 Val Acc: 1.0000


TypeError: train_distance() missing 1 required positional argument: 'device'