In [2]:
import chess
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
import os

# Mapping pieces to tensor channels.
piece_to_channel = {
    (chess.PAWN, True): 0,
    (chess.KNIGHT, True): 1,
    (chess.BISHOP, True): 2,
    (chess.ROOK, True): 3,
    (chess.QUEEN, True): 4,
    (chess.KING, True): 5,
    (chess.PAWN, False): 6,
    (chess.KNIGHT, False): 7,
    (chess.BISHOP, False): 8,
    (chess.ROOK, False): 9,
    (chess.QUEEN, False): 10,
    (chess.KING, False): 11,
}

def board_to_tensor(board):
    """Convert a python-chess board to a (12, 8, 8) tensor."""
    board_tensor = np.zeros((12, 8, 8), dtype=np.float32)
    for square in chess.SQUARES:
        piece = board.piece_at(square)
        if piece is not None:
            channel = piece_to_channel[(piece.piece_type, piece.color)]
            row = chess.square_rank(square)
            col = chess.square_file(square)
            board_tensor[channel, row, col] = 1.0
    return torch.from_numpy(board_tensor)

class ChessNet(nn.Module):
    """
    A smaller convolutional network for chess board evaluation.
    Input: (12, 8, 8) tensor.
    Output: Scalar value in [-1, 1] (from White's perspective).
    """
    def __init__(self):
        super(ChessNet, self).__init__()
        self.conv = nn.Conv2d(12, 32, kernel_size=3, padding=1)
        self.fc = nn.Linear(32 * 8 * 8, 1)
    
    def forward(self, x):
        x = torch.relu(self.conv(x))
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return torch.tanh(x)

def select_move(board, net, epsilon=0.5):
    """
    Select a move using one-move lookahead.
    With probability epsilon, a random move is chosen.
    """
    legal_moves = list(board.legal_moves)
    if random.random() < epsilon:
        return random.choice(legal_moves)
    
    best_move = None
    best_eval = None
    for move in legal_moves:
        board_copy = board.copy()
        board_copy.push(move)
        board_tensor = board_to_tensor(board_copy).unsqueeze(0)  # shape: (1, 12, 8, 8)
        with torch.no_grad():
            value = net(board_tensor).item()
        # For White's turn, higher is better; for Black's turn, lower is better.
        if board.turn:  # White to move
            if best_move is None or value > best_eval:
                best_eval = value
                best_move = move
        else:  # Black to move (from White's perspective, lower is better)
            if best_move is None or value < best_eval:
                best_eval = value
                best_move = move
    return best_move if best_move is not None else random.choice(legal_moves)

def simulate_game(net, epsilon=0.5):
    """
    Simulate one game of chess using the current network.
    Returns:
        states: list of board tensors (each shape (12, 8, 8))
        targets: list of target values for each state
        outcome: final game outcome (1 for White win, -1 for Black win, 0 for draw)
    """
    board = chess.Board()
    states = []
    turns = []
    
    while not board.is_game_over():
        states.append(board_to_tensor(board))
        turns.append(board.turn)
        move = select_move(board, net, epsilon)
        board.push(move)
    
    result = board.result()
    if result == "1-0":
        outcome = 1
    elif result == "0-1":
        outcome = -1
    else:
        outcome = 0
    
    # Create target values: for White's turn, target = outcome; for Black, target = -outcome.
    targets = [outcome if turn else -outcome for turn in turns]
    
    return states, targets, outcome

def self_play_training(num_games=50000, net=None, epsilon=0.5):
    """
    Runs self-play training sequentially, updating the network after each game.
    Logs the outcome and loss after every game.
    """
    if net is None:
        net = ChessNet()
    optimizer = optim.Adam(net.parameters(), lr=0.001)
    
    game_results = {1: 0, -1: 0, 0: 0}
    
    for game in range(1, num_games + 1):
        states, targets, outcome = simulate_game(net, epsilon)
        
        # Prepare training data.
        states_tensor = torch.stack(states)  # shape: (N, 12, 8, 8)
        targets_tensor = torch.tensor(targets, dtype=torch.float32).unsqueeze(1)
        
        optimizer.zero_grad()
        predictions = net(states_tensor)
        loss = nn.MSELoss()(predictions, targets_tensor)
        loss.backward()
        optimizer.step()
        
        game_results[outcome] += 1
        # Log details after each game.

        if game % 1000 == 0:
            print(f"Game {game}: Outcome: {outcome}, Loss: {loss.item():.4f} | Wins: {game_results[1]}, Losses: {game_results[-1]}, Draws: {game_results[0]}")
            save_net(net, 'chess_net.pth')
    
    return net

# Helper functions to save and load the network.
def save_net(net, path):
    torch.save(net.state_dict(), path)
    print(f"Network saved to {path}")

def load_net(path):
    net = ChessNet()
    net.load_state_dict(torch.load(path))
    print(f"Network loaded from {path}")
    return net




In [8]:
# --------------------------
# Sample usage in a Jupyter Notebook:
# --------------------------
num_games = 1000
epsilon = 0.1

# Optionally load a cached network.
if os.path.exists('chess_net.pth'):
    net = load_net('chess_net.pth')
else:
    net = None

Network loaded from chess_net.pth


In [9]:
# Run self-play training.
trained_net = self_play_training(num_games=num_games, net=net, epsilon=epsilon)

Game 1000: Outcome: 0, Loss: 0.0000 | Wins: 6, Losses: 5, Draws: 989
Network saved to chess_net.pth
