In [1]:
import chess
import chess.pgn
import glob
import numpy as np

def board_to_tensor(board: chess.Board) -> np.ndarray:
    tensor = np.zeros((12, 8, 8), dtype=np.uint8)

    piece_to_index = {
        (chess.PAWN, chess.WHITE): 0,
        (chess.KNIGHT, chess.WHITE): 1,
        (chess.BISHOP, chess.WHITE): 2,
        (chess.ROOK, chess.WHITE): 3,
        (chess.QUEEN, chess.WHITE): 4,
        (chess.KING, chess.WHITE): 5,
        (chess.PAWN, chess.BLACK): 6,
        (chess.KNIGHT, chess.BLACK): 7,
        (chess.BISHOP, chess.BLACK): 8,
        (chess.ROOK, chess.BLACK): 9,
        (chess.QUEEN, chess.BLACK): 10,
        (chess.KING, chess.BLACK): 11,
    }

    for square, piece in board.piece_map().items():
        row = 7 - chess.square_rank(square) # Flip for correct orientation
        col = chess.square_file(square)
        idx = piece_to_index[(piece.piece_type, piece.color)]
        tensor[idx, row, col] = 1

    return tensor

def move_to_tensor(move: chess.Move) -> np.ndarray:
    start_mask = np.zeros((8, 8), dtype=np.uint8)
    end_mask = np.zeros((8, 8), dtype=np.uint8)

    start_row = 7 - chess.square_rank(move.from_square)
    start_col = chess.square_file(move.from_square)

    end_row = 7 - chess.square_rank(move.to_square)
    end_col = chess.square_file(move.to_square)

    start_mask[start_row, start_col] = 1
    end_mask[end_row, end_col] = 1
    return np.stack([start_mask, end_mask])

In [2]:
from stockfish import Stockfish
stockfish = Stockfish(path="./stockfish-ubuntu-x86-64-avx2")

path = 'data/5_less.pgn'

num_games = 0
endplay_games = 0

inputs = []
outputs = []

for filepath in glob.glob(path):
    with open(filepath, encoding="utf-8", errors="ignore") as file:
        while(True):
            game = chess.pgn.read_game(file)
            if game is None:
                break
            num_games += 1
            if (num_games % 500) == 0:
                print(num_games)
            final_board = game.end().board()
            # display(final_board)
            num_pieces = len(final_board.piece_map())
            if (game.headers["Result"] == "1-0"): # just look at white wins for now, as convention
                end_node = game.end()
                node = end_node
                for _ in range(5): # go back 5 plies
                    node = node.parent
                if node.board().turn == chess.WHITE:
                    node = node.parent
                # display(node.board())
                stockfish.set_fen_position(node.board().fen())
                evaluation = stockfish.get_evaluation()
                # print(stockfish.get_evaluation())
                if ((evaluation['type'] == 'cp') and (evaluation['value']) > 300) or ((evaluation['type'] == "mate") and (evaluation['value'] > 0)): # white is winning
                    x = board_to_tensor(node.board())
                    y = move_to_tensor(node.move)
                    inputs.append(x)
                    outputs.append(y)
                    endplay_games += 1

print(num_games, endplay_games)

500


no matching legal move for e2e4 (e2 -> e4) in 6k1/4P3/r3R3/4K3/8/8/8/8 b - - 10 64 while parsing <Game at 0x767267a66d70 ('?' vs. '?', '????.??.??' at '?')>
illegal san: 'f1' in 8/8/8/8/5b2/8/4Kpk1/5N2 w - - 3 77 while parsing <Game at 0x767267ab8b20 ('?' vs. '?', '????.??.??' at '?')>
illegal san: 'ga1' in 8/8/1P6/3k3p/8/4B3/7K/8 w - - 5 63 while parsing <Game at 0x76725eb026b0 ('?' vs. '?', '????.??.??' at '?')>
illegal san: 'g1' in 8/8/8/p5P1/1Rk4K/8/8/8 b - - 0 62 while parsing <Game at 0x767267ab8fa0 ('?' vs. '?', '????.??.??' at '?')>
no matching legal move for e2e4 (e2 -> e4) in 8/3K4/5P1p/7k/8/3B4/8/8 b - - 0 66 while parsing <Game at 0x76725f5a02e0 ('?' vs. '?', '????.??.??' at '?')>
illegal san: 'h2' in 8/7K/4k3/6P1/6r1/8/8/8 w - - 0 85 while parsing <Game at 0x76725eb4e020 ('?' vs. '?', '????.??.??' at '?')>


1000


no matching legal move for e2e4 (e2 -> e4) in 8/K2k4/1P6/8/8/8/8/8 b - - 2 60 while parsing <Game at 0x76725eb4da20 ('?' vs. '?', '????.??.??' at '?')>
illegal san: 'g1' in 8/6p1/8/5k1p/7P/6K1/8/8 w - - 0 57 while parsing <Game at 0x76725ff32680 ('?' vs. '?', '????.??.??' at '?')>
illegal san: 'g1' in 8/2k5/4K3/1pP5/4B3/8/8/8 b - - 0 60 while parsing <Game at 0x76725f593370 ('?' vs. '?', '????.??.??' at '?')>
illegal san: 'ch5' in 8/8/8/8/8/5pk1/6N1/1r4K1 w - - 4 53 while parsing <Game at 0x76725ffbd030 ('?' vs. '?', '????.??.??' at '?')>


1500
2000


illegal san: 'Be1' in 5k2/3b4/5K2/8/8/8/1R6/8 b - - 2 73 while parsing <Game at 0x76725f57c040 ('?' vs. '?', '????.??.??' at '?')>
illegal san: 'a1' in 8/8/8/8/8/2n1K3/4N1p1/7k w - - 12 93 while parsing <Game at 0x76725eb4ee30 ('?' vs. '?', '????.??.??' at '?')>


2500


illegal san: 'h2' in 8/r3Q2k/5P2/4K3/8/8/8/8 b - - 2 89 while parsing <Game at 0x76725ffbcb50 ('?' vs. '?', '????.??.??' at '?')>
no matching legal move for e2e4 (e2 -> e4) in 8/8/8/3r4/8/4R3/2K1pk2/8 w - - 8 106 while parsing <Game at 0x76726438d1e0 ('?' vs. '?', '????.??.??' at '?')>
no matching legal move for e2e4 (e2 -> e4) in 8/8/8/8/8/3Kp1k1/2n5/8 w - - 2 69 while parsing <Game at 0x767267ac6e60 ('?' vs. '?', '????.??.??' at '?')>
no matching legal move for e2e4 (e2 -> e4) in 8/8/4K3/8/4k1p1/8/7p/5R2 w - - 2 57 while parsing <Game at 0x76725f59d150 ('?' vs. '?', '????.??.??' at '?')>


3000


illegal san: 'a2' in 7R/8/8/8/8/5BK1/8/r4k2 b - - 96 130 while parsing <Game at 0x76726438c1f0 ('?' vs. '?', '????.??.??' at '?')>
no matching legal move for e2e4 (e2 -> e4) in 8/6P1/p2k3K/8/8/8/8/2q5 w - - 0 54 while parsing <Game at 0x76725ebd0370 ('?' vs. '?', '????.??.??' at '?')>


3500


illegal san: 'e1' in r7/3RK1P1/2k5/8/8/8/8/8 b - - 2 80 while parsing <Game at 0x767267ae3730 ('?' vs. '?', '????.??.??' at '?')>
no matching legal move for e2e4 (e2 -> e4) in 8/2P5/4q3/K7/4k3/8/8/4Q3 b - - 24 86 while parsing <Game at 0x767267ac6e00 ('?' vs. '?', '????.??.??' at '?')>
illegal san: 'ba2' in 8/8/8/8/kr6/4K3/p7/3R4 w - - 2 65 while parsing <Game at 0x76725f521db0 ('?' vs. '?', '????.??.??' at '?')>
illegal san: 'f1' in 4k3/5p2/2K2P2/4P3/8/8/8/8 b - - 2 78 while parsing <Game at 0x76725eb151e0 ('?' vs. '?', '????.??.??' at '?')>
no matching legal move for e2e4 (e2 -> e4) in 8/3k2P1/5K2/5R2/r7/8/8/8 b - - 0 75 while parsing <Game at 0x76725f5eb880 ('?' vs. '?', '????.??.??' at '?')>
no matching legal move for e2e4 (e2 -> e4) in 5R2/8/8/4r3/8/3K2k1/6p1/8 w - - 8 73 while parsing <Game at 0x76725f56d0f0 ('?' vs. '?', '????.??.??' at '?')>
no matching legal move for e2e4 (e2 -> e4) in 7k/8/4R1KP/8/8/8/8/8 b - - 0 99 while parsing <Game at 0x76726438d000 ('?' vs. '?', '????.??

4000


illegal san: 'a1' in 5R2/8/8/6K1/1r6/4k3/4p3/8 w - - 2 59 while parsing <Game at 0x767267b20400 ('?' vs. '?', '????.??.??' at '?')>


4500


no matching legal move for e2e4 (e2 -> e4) in 8/P3k3/1K6/1R6/8/8/8/r7 b - - 2 98 while parsing <Game at 0x7672643ceaa0 ('?' vs. '?', '????.??.??' at '?')>


5000


illegal san: 'Ba1' in 8/8/8/6r1/8/3bk3/8/2R1K3 w - - 14 101 while parsing <Game at 0x76725fff58d0 ('?' vs. '?', '????.??.??' at '?')>
no matching legal move for e2e4 (e2 -> e4) in 8/8/8/8/8/5bkn/8/7K w - - 90 99 while parsing <Game at 0x767267a52da0 ('?' vs. '?', '????.??.??' at '?')>
illegal san: 'a2' in 8/6K1/8/4k3/7P/8/8/8 b - - 0 77 while parsing <Game at 0x76725ebd3220 ('?' vs. '?', '????.??.??' at '?')>


5500


no matching legal move for e2e4 (e2 -> e4) in 8/8/R7/kPK5/8/8/8/5b2 b - - 0 89 while parsing <Game at 0x76725f561cc0 ('?' vs. '?', '????.??.??' at '?')>


6000


illegal san: 'ha1' in 8/1p6/1P6/8/5K2/8/5k2/8 b - - 0 62 while parsing <Game at 0x76726438d000 ('?' vs. '?', '????.??.??' at '?')>
illegal san: 'e1' in 8/8/R7/5r2/8/8/3p2K1/3k4 w - - 4 62 while parsing <Game at 0x76725ebfc8e0 ('?' vs. '?', '????.??.??' at '?')>
no matching legal move for e2e4 (e2 -> e4) in 8/8/PN2k3/1K6/8/8/8/6b1 b - - 3 89 while parsing <Game at 0x76725ffbff40 ('?' vs. '?', '????.??.??' at '?')>
illegal san: 'a1' in 5r2/4R3/2k5/4K3/5P2/8/8/8 b - - 12 66 while parsing <Game at 0x767267b13d30 ('?' vs. '?', '????.??.??' at '?')>
no matching legal move for e2e4 (e2 -> e4) in 1K6/1P6/8/1k6/8/8/R7/6r1 b - - 4 77 while parsing <Game at 0x76725eb2ace0 ('?' vs. '?', '????.??.??' at '?')>
no matching legal move for e2e4 (e2 -> e4) in 4B3/8/8/6r1/8/4kp2/8/5K2 w - - 0 78 while parsing <Game at 0x76725eb538b0 ('?' vs. '?', '????.??.??' at '?')>
illegal san: 'ha1' in 8/6K1/2k5/6p1/6P1/8/8/8 b - - 1 66 while parsing <Game at 0x7672641bd420 ('?' vs. '?', '????.??.??' at '?')>
illegal

6500
7000


illegal san: 'ha1' in 8/8/8/6p1/8/6k1/8/7K w - - 0 75 while parsing <Game at 0x767267a7c520 ('?' vs. '?', '????.??.??' at '?')>


7500


no matching legal move for e2e4 (e2 -> e4) in 2K5/1P6/2q5/8/8/8/6k1/8 w - - 10 92 while parsing <Game at 0x76725f5ad7b0 ('?' vs. '?', '????.??.??' at '?')>


8000


no matching legal move for e2e4 (e2 -> e4) in 8/P1K1k3/1R6/6r1/8/8/8/8 b - - 0 71 while parsing <Game at 0x767267b2a0e0 ('?' vs. '?', '????.??.??' at '?')>
illegal san: 'e1' in 8/2k1K3/1p6/1P6/2P5/8/8/8 b - - 0 58 while parsing <Game at 0x76725eb41e40 ('?' vs. '?', '????.??.??' at '?')>
no matching legal move for e2e4 (e2 -> e4) in 8/8/7b/8/8/1k6/p1NK4/8 w - - 2 90 while parsing <Game at 0x767267a79030 ('?' vs. '?', '????.??.??' at '?')>
no matching legal move for e2e4 (e2 -> e4) in 8/P2k4/K5R1/8/8/1r6/8/8 b - - 2 61 while parsing <Game at 0x767267ab1ae0 ('?' vs. '?', '????.??.??' at '?')>
no matching legal move for e2e4 (e2 -> e4) in 8/8/8/8/8/2n5/1bk5/K7 w - - 35 106 while parsing <Game at 0x76725f56df60 ('?' vs. '?', '????.??.??' at '?')>
no matching legal move for e2e4 (e2 -> e4) in 8/8/K7/8/7p/6n1/6k1/6B1 w - - 1 72 while parsing <Game at 0x76725f5ea230 ('?' vs. '?', '????.??.??' at '?')>
no matching legal move for e2e4 (e2 -> e4) in 8/5k2/7P/p4K2/P7/8/8/8 b - - 2 91 while parsing

8500
9000


illegal san: 'a1' in 2R5/8/8/8/8/2K2r2/2p5/2k5 w - - 4 57 while parsing <Game at 0x767267b22590 ('?' vs. '?', '????.??.??' at '?')>
illegal san: 'e1' in 8/8/8/5K1k/7p/5p1P/8/8 w - - 0 68 while parsing <Game at 0x76725ff468f0 ('?' vs. '?', '????.??.??' at '?')>
illegal san: 'h2' in 8/1k4P1/5K2/8/5R2/8/8/6r1 b - - 4 66 while parsing <Game at 0x76725ff482e0 ('?' vs. '?', '????.??.??' at '?')>


9500


illegal san: 'Be1' in Q7/8/8/4p3/8/4K3/8/6k1 b - - 1 78 while parsing <Game at 0x76725e177ee0 ('?' vs. '?', '????.??.??' at '?')>
illegal san: 'Ba1' in 8/7r/1R6/8/8/5K2/6p1/6k1 b - - 3 74 while parsing <Game at 0x76725ebf4b80 ('?' vs. '?', '????.??.??' at '?')>


10000


no matching legal move for e2e4 (e2 -> e4) in 4R3/8/8/4r3/4k3/8/4p2K/8 w - - 16 102 while parsing <Game at 0x76725eb6efe0 ('?' vs. '?', '????.??.??' at '?')>
illegal san: 'e1' in 8/7p/8/1N6/4K3/8/pk6/8 w - - 4 69 while parsing <Game at 0x767267abed70 ('?' vs. '?', '????.??.??' at '?')>
illegal san: 'a1' in 2R5/1K6/8/8/4p3/5k2/8/5q2 w - - 0 54 while parsing <Game at 0x76725e182680 ('?' vs. '?', '????.??.??' at '?')>


10500
11000


illegal san: 'a1' in 8/8/1b6/8/8/4k1pp/8/7K w - - 0 70 while parsing <Game at 0x76725e1472b0 ('?' vs. '?', '????.??.??' at '?')>


11500


illegal san: 'a1' in 8/8/8/3k4/3r2P1/4RK2/8/8 b - - 0 53 while parsing <Game at 0x76725f563100 ('?' vs. '?', '????.??.??' at '?')>
no matching legal move for e2e4 (e2 -> e4) in R7/5k1P/8/8/5K2/8/8/6r1 b - - 2 65 while parsing <Game at 0x76725ff469b0 ('?' vs. '?', '????.??.??' at '?')>
illegal san: 'e1' in 8/8/8/6pN/1K1k2B1/8/8/8 b - - 5 92 while parsing <Game at 0x76725eb28f70 ('?' vs. '?', '????.??.??' at '?')>


12000


illegal san: 'a1' in 8/8/8/n2B4/5k2/2K5/6P1/8 b - - 5 59 while parsing <Game at 0x76725ffbc040 ('?' vs. '?', '????.??.??' at '?')>
no matching legal move for e2e4 (e2 -> e4) in 5k2/5N1P/6K1/8/8/8/8/8 b - - 0 113 while parsing <Game at 0x76725f51be80 ('?' vs. '?', '????.??.??' at '?')>


12500


illegal san: 'ha1' in 8/2K5/P3R3/6k1/8/8/r7/8 b - - 6 68 while parsing <Game at 0x76725ff47ca0 ('?' vs. '?', '????.??.??' at '?')>


12904 6095


In [3]:
X = np.stack(inputs)
Y = np.stack(outputs)

np.savez_compressed("data/5_less_tensor.npz", inputs=X, outputs=Y)