In [1]:
import chess
import chess.pgn
import os
import re
import numpy as np
from tqdm import tqdm

In [2]:
os.chdir('..')

In [3]:
def load_pgns(file_path, num_games=None, start_index=0, encoding="utf-8"):
    games = []
    with open(file_path, "r", encoding=encoding) as file:
        for _ in tqdm(range(start_index), desc='Skipping games', unit='game', leave=False):
            game = chess.pgn.read_game(file)
            if game is None:
                break
        for _ in tqdm(range(num_games), desc='Loading games', unit='game', leave=True) if num_games else iter(int, 1):
            game = chess.pgn.read_game(file)
            if game is None:
                break
            games.append(game)
    return games

In [4]:
assets_path = os.path.join(os.getcwd(), 'asset')
single_path = os.path.join(assets_path, 'fabdub.pgn')
games = load_pgns(single_path, 1)
game = games[0]
board = game.board()

Loading games: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 142.90game/s]


In [5]:
def extract_eval_clk_from_pgn(input):
    eval_pattern = r"\[%eval\s+([0-9.-]+)\]"
    clk_pattern = r"\[%clk\s+([0-9:]+)\]"
    eval = re.search(eval_pattern, input)
    clk = re.search(clk_pattern, input)
    eval = eval.group(1) if eval else None
    clk = clk.group(1) if clk else None
    return eval, clk

def eval_to_cp(eval):
    eval = float(eval)
    return eval * 100

def clk_to_time(clk):
    h, m, s = clk.split(":")
    t = int(h) * 3600 + int(m) * 60 + int(s)
    return t

def eval_to_game_state(value, cuts = None):
    if cuts == None:
        cuts = [np.inf, 375, 250, 150, 75, 25, -25, -75, -150, -250, -375, -np.inf]
    for i in range(len(cuts) - 1):
        if cuts[i] >= value > cuts[i + 1]:
            return round(1-i/10, 2)

def fen_to_array(fen):
    piece_mapping = {'p': -1, 'n': -2, 'b': -3, 'r': -4, 'q': -5, 'k': -6,
                     'P': 1, 'N': 2, 'B': 3, 'R': 4, 'Q': 5, 'K': 6}
    board_fen, turn, castling, en_passant, halfmove, fullmove = fen.split()
    board_array = [0] * 64
    rank = 7
    file = 0
    for char in board_fen:
        if char.isdigit():
            file += int(char)
        elif char == '/':
            rank -= 1
            file = 0
        else:
            index = rank * 8 + file
            board_array[index] = piece_mapping[char]
            file += 1
    return np.array(board_array)

In [6]:
moves = [move for move in game.mainline_moves()]
board = game.board()
node = game
for move in moves:
    node = node.next()
    eval, clk = extract_eval_clk_from_pgn(node.comment)
    eval = eval_to_game_state(eval_to_cp(eval))
    clk = clk_to_time(clk)
    lan = board.lan(move)
    fen = fen_to_array(board.fen())
    board.push(move)
    print(lan, eval, clk)
    print(fen)

e2-e4 0.5 182
[ 4  2  3  5  6  3  2  4  1  1  1  1  1  1  1  1  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
 -1 -1 -1 -1 -1 -1 -1 -1 -4 -2 -3 -5 -6 -3 -2 -4]
e7-e5 0.5 183
[ 4  2  3  5  6  3  2  4  1  1  1  1  0  1  1  1  0  0  0  0  0  0  0  0
  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
 -1 -1 -1 -1 -1 -1 -1 -1 -4 -2 -3 -5 -6 -3 -2 -4]
Ng1-f3 0.6 183
[ 4  2  3  5  6  3  2  4  1  1  1  1  0  1  1  1  0  0  0  0  0  0  0  0
  0  0  0  0  1  0  0  0  0  0  0  0 -1  0  0  0  0  0  0  0  0  0  0  0
 -1 -1 -1 -1  0 -1 -1 -1 -4 -2 -3 -5 -6 -3 -2 -4]
Nb8-c6 0.6 185
[ 4  2  3  5  6  3  0  4  1  1  1  1  0  1  1  1  0  0  0  0  0  2  0  0
  0  0  0  0  1  0  0  0  0  0  0  0 -1  0  0  0  0  0  0  0  0  0  0  0
 -1 -1 -1 -1  0 -1 -1 -1 -4 -2 -3 -5 -6 -3 -2 -4]
Bf1-c4 0.5 183
[ 4  2  3  5  6  3  0  4  1  1  1  1  0  1  1  1  0  0  0  0  0  2  0  0
  0  0  0  0  1  0  0  0  0  0  0  0 -1  0  0  0  0  0 -2  0  0  0  0 