# 🧠 Hybrid Neural Chess Engine

### Learning from Chess.com (Hikaru Nakamura) + Self-Play Reinforcement

## ✅ SECTION 0 — Setup (Colab Compatible)

In [None]:
!pip -q install python-chess torch torchvision requests tqdm

import os
import io
import re
import json
import random
import requests
import numpy as np
from tqdm.auto import tqdm

import chess
import chess.pgn

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)


## 📌 SECTION 1 — BUSINESS UNDERSTANDING

This notebook shows a complete **hybrid chess-learning workflow**:

- **Imitation learning** from real Chess.com games (e.g., Hikaru).
- **Self-play reinforcement** for additional policy refinement.
- **Policy + Value** models for move selection and board evaluation.
- **CNN + Transformer** architecture for spatial + contextual reasoning.

## 🌐 SECTION 2 — DOWNLOAD REAL DATA FROM CHESS.COM

In [None]:
def get_chesscom_archives(username: str):
    url = f"https://api.chess.com/pub/player/{username}/games/archives"
    response = requests.get(url, timeout=30)
    response.raise_for_status()
    return response.json().get("archives", [])


def download_chesscom_pgn(username: str, max_archives: int = 12, out_path: str = "hikaru_chesscom.pgn"):
    archives = get_chesscom_archives(username)
    # Download latest archives first
    archives = archives[::-1][:max_archives]

    if not archives:
        raise ValueError(f"No archives found for user: {username}")

    all_pgn_chunks = []
    for archive_url in tqdm(archives, desc="Downloading monthly PGNs"):
        pgn_url = archive_url + "/pgn"
        r = requests.get(pgn_url, timeout=60)
        if r.status_code == 200 and r.text.strip():
            all_pgn_chunks.append(r.text.strip())

    if not all_pgn_chunks:
        raise ValueError("Could not download any PGN data from Chess.com archives.")

    with open(out_path, "w", encoding="utf-8") as f:
        f.write("

".join(all_pgn_chunks))

    return out_path, len(all_pgn_chunks)


# Example: download Hikaru's latest archives
pgn_path, months_downloaded = download_chesscom_pgn("hikaru", max_archives=12, out_path="hikaru_chesscom.pgn")
print(f"Saved PGN to: {pgn_path} | months downloaded: {months_downloaded}")


## 📊 SECTION 3 — DATA PREPARATION

In [None]:
def board_to_tensor(board: chess.Board) -> torch.Tensor:
    tensor = np.zeros((12, 8, 8), dtype=np.float32)
    for square, piece in board.piece_map().items():
        row = 7 - chess.square_rank(square)
        col = chess.square_file(square)
        piece_type = piece.piece_type - 1
        color_offset = 0 if piece.color == chess.WHITE else 6
        tensor[piece_type + color_offset, row, col] = 1.0
    return torch.tensor(tensor)


def generate_move_vocab():
    files = "abcdefgh"
    ranks = "12345678"
    promotions = ["q", "r", "b", "n"]

    moves = set()
    for ff in files:
        for fr in ranks:
            for tf in files:
                for tr in ranks:
                    if ff == tf and fr == tr:
                        continue
                    base = f"{ff}{fr}{tf}{tr}"
                    moves.add(base)
                    if (fr == "7" and tr == "8") or (fr == "2" and tr == "1"):
                        for p in promotions:
                            moves.add(base + p)

    moves = sorted(moves)
    move_to_idx = {m: i for i, m in enumerate(moves)}
    idx_to_move = {i: m for m, i in move_to_idx.items()}
    return moves, move_to_idx, idx_to_move


all_moves, move_to_idx, idx_to_move = generate_move_vocab()
print("Move vocab size:", len(all_moves))


## 📦 SECTION 4 — DATASET CLASS (PGN)

In [None]:
class ChessDataset(Dataset):
    def __init__(self, pgn_file: str, move_to_idx: dict, max_games: int | None = 1000):
        self.positions = []
        self.moves = []

        loaded_games = 0
        with open(pgn_file, "r", encoding="utf-8", errors="ignore") as f:
            while True:
                game = chess.pgn.read_game(f)
                if game is None:
                    break

                board = game.board()
                for move in game.mainline_moves():
                    self.positions.append(board_to_tensor(board))
                    self.moves.append(move_to_idx.get(move.uci(), 0))
                    board.push(move)

                loaded_games += 1
                if max_games is not None and loaded_games >= max_games:
                    break

        self.moves = torch.tensor(self.moves, dtype=torch.long)
        print(f"Loaded {loaded_games} games, {len(self.positions)} positions")

    def __len__(self):
        return len(self.positions)

    def __getitem__(self, idx):
        return self.positions[idx], self.moves[idx]


dataset = ChessDataset(pgn_path, move_to_idx, max_games=500)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)


## 🧠 SECTION 5 — MODEL ARCHITECTURE

In [None]:
class PolicyNetwork(nn.Module):
    def __init__(self, move_vocab_size: int):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(12, 64, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.Flatten()
        )
        self.fc = nn.Linear(128 * 8 * 8, 512)

        enc_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8, batch_first=True)
        self.transformer = nn.TransformerEncoder(enc_layer, num_layers=2)

        self.policy_head = nn.Linear(512, move_vocab_size)

    def forward(self, x):
        x = self.cnn(x)
        x = self.fc(x)
        x = x.unsqueeze(1)
        x = self.transformer(x)
        x = x.squeeze(1)
        return self.policy_head(x)


class ValueNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(12, 64, 3, padding=1),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(64 * 8 * 8, 256),
            nn.ReLU(),
            nn.Linear(256, 1),
            nn.Tanh()
        )

    def forward(self, x):
        return self.model(x)


## 🎯 SECTION 6 — IMITATION LEARNING

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
policy_net = PolicyNetwork(len(move_to_idx)).to(device)
value_net = ValueNetwork().to(device)

optimizer = optim.Adam(policy_net.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()


def train_policy(dataloader, epochs=2):
    policy_net.train()
    for epoch in range(epochs):
        total_loss = 0.0
        for boards, moves in tqdm(dataloader, desc=f"Epoch {epoch+1}"):
            boards = boards.to(device)
            moves = moves.to(device)

            logits = policy_net(boards)
            loss = criterion(logits, moves)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(f"Epoch {epoch+1}/{epochs} | loss={total_loss:.4f}")


train_policy(dataloader, epochs=2)


## 🔥 SECTION 7 — SELF-PLAY REINFORCEMENT (SIMPLIFIED)

In [None]:
def select_legal_move_from_logits(board: chess.Board, logits: torch.Tensor):
    legal = list(board.legal_moves)
    legal_indices = [move_to_idx[m.uci()] for m in legal if m.uci() in move_to_idx]
    if not legal_indices:
        move = random.choice(legal)
        return move, move_to_idx.get(move.uci(), 0)

    local_logits = logits[0, legal_indices]
    probs = torch.softmax(local_logits, dim=0)
    selected_local = torch.multinomial(probs, 1).item()
    selected_idx = legal_indices[selected_local]
    return chess.Move.from_uci(idx_to_move[selected_idx]), selected_idx


def self_play_game(max_plies=200):
    board = chess.Board()
    history = []
    policy_net.eval()

    while not board.is_game_over() and len(history) < max_plies:
        state = board_to_tensor(board).unsqueeze(0).to(device)
        with torch.no_grad():
            logits = policy_net(state)
        move, move_idx = select_legal_move_from_logits(board, logits)
        history.append((state, move_idx))
        board.push(move)

    result = board.result()
    reward = 1 if result == "1-0" else -1 if result == "0-1" else 0
    return history, reward, result


for i in range(3):
    _, reward, result = self_play_game()
    print(f"Self-play game {i+1}: {result}, reward={reward}")


## 🎮 SECTION 8 — PLAY AGAINST ENGINE

In [None]:
def predict_move(board: chess.Board) -> chess.Move:
    state = board_to_tensor(board).unsqueeze(0).to(device)
    policy_net.eval()
    with torch.no_grad():
        logits = policy_net(state)

    legal = list(board.legal_moves)
    legal_indices = [move_to_idx[m.uci()] for m in legal if m.uci() in move_to_idx]
    if not legal_indices:
        return random.choice(legal)

    local_logits = logits[0, legal_indices]
    best_local = torch.argmax(local_logits).item()
    best_idx = legal_indices[best_local]
    return chess.Move.from_uci(idx_to_move[best_idx])


board = chess.Board()
print("Engine move from start:", predict_move(board).uci())


## 📊 SECTION 9 — EVALUATION

- Top-1 and Top-5 move prediction on held-out Chess.com games.
- Win rate vs random legal-move baseline.
- Optional Elo tracking by checkpoint.