# PGN to Tensor

In [None]:
import chess
import chess.pgn
import numpy as np

In [None]:
def create_board_tensor(board):
    # Initialize 8x8x12 tensor with zeros
    tensor = np.zeros((8, 8, 12), dtype=np.float32)

    # Piece type mapping (6 pieces * 2 colors = 12 channels)
    piece_idx = {
        'P': 0, 'N': 1, 'B': 2, 'R': 3, 'Q': 4, 'K': 5,
        'p': 6, 'n': 7, 'b': 8, 'r': 9, 'q': 10, 'k': 11
    }

    # Fill tensor based on piece positions
    for square in chess.SQUARES:
        piece = board.piece_at(square)
        if piece is not None:
            rank = 7 - chess.square_rank(square)  # Flip rank for correct orientation
            file = chess.square_file(square)
            tensor[rank, file, piece_idx[piece.symbol()]] = 1

    return tensor

In [None]:
def extract_eval(comment):
    if not comment or '[%eval' not in comment:
        return None

    try:
        # Extract everything between [%eval and ]
        start = comment.index('[%eval') + 6
        end = comment.index(']', start)
        eval_str = comment[start:end].strip()

        # Handle mate scores
        if eval_str.startswith('#'):
            return 100 if int(eval_str[1:]) > 0 else -100

        # Handle regular scores
        return float(eval_str)
    except:
        return None

In [None]:
def process_pgn(pgn_file, max_positions):
    positions = {}  # Dictionary to store unique positions and their evaluations

    with open(pgn_file) as f:
        while True:
            game = chess.pgn.read_game(f)
            if game is None:
                break

            board = game.board()
            node = game

            while node.variations:
                node = node.variation(0)
                board.push(node.move)

                eval_score = extract_eval(node.comment)
                if eval_score is not None:
                    # Create FEN string without move counters to identify unique positions
                    fen = board.fen().rsplit(' ', 2)[0]
                    if fen not in positions:
                        tensor = create_board_tensor(board)
                        positions[fen] = (tensor, eval_score)

                        # Stop if max_positions is reached
                        if len(positions) >= max_positions:
                            print(f"Reached max positions limit: {max_positions}")
                            return positions

    return positions

## Rodar o código

In [None]:
# Usage
pgn_file = "input.pgn"
max_positions = 50000  # Limit the number of board states stored
positions = process_pgn(pgn_file, max_positions)

# Convert to training data
X = []  # Tensors
y = []  # Evaluations

for tensor, eval_score in positions.values():
    X.append(tensor)
    y.append(eval_score)

X = np.array(X)
y = np.array(y)

# Now X and y are ready for machine learning
# X.shape should be (n_positions, 8, 8, 12)
# y.shape should be (n_positions,)

print(f"Total unique positions: {len(positions)}")
print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")

# Save
np.save('processed_data/board_states.npy', X)
np.save('processed_data/evaluations.npy', y)

Reached max positions limit: 50000
Total unique positions: 50000
X shape: (50000, 8, 8, 12)
y shape: (50000,)
