In [27]:
import chess
import json
import math
from os import listdir
from os.path import isfile, join
import pandas
from stockfish import Stockfish
from tqdm.notebook import tqdm

In [28]:
USERNAMES = ["magnuscarlsen", "thedarkknighttrilogy", "hikaru", "wonderfultime", "danielnaroditsky"]
USERNAME = "thedarkknighttrilogy"
FILE_PATH = f"./{USERNAME}/games"

BLUNDER_CENTIPAWN = 300
MISTAKE_CENTIPAWN = 100
INACCURACY_CENTIPAWN = 50

BLUNDER_SYMBOL = "??"
MISTAKE_SYMBOL = "?"
INACCURACY_SYMBOL = "?!"
INTERESTING_SYMBOL = "!?"
GOOD_SYMBOL = "!"
BRILLIANT_SYMBOL = "!!"

In [29]:
pgn_file_names = [f for f in listdir(FILE_PATH) if isfile(join(FILE_PATH, f))]
pgn_file_names.sort()
pgn_file_names

['2021-10.pgn',
 '2021-11.pgn',
 '2021-12.pgn',
 '2022-01.pgn',
 '2022-02.pgn',
 '2022-03.pgn',
 '2022-04.pgn',
 '2022-05.pgn',
 '2022-06.pgn',
 '2022-07.pgn',
 '2022-08.pgn',
 '2022-09.pgn',
 '2022-10.pgn',
 '2022-11.pgn',
 '2022-12.pgn',
 '2023-01.pgn',
 '2023-02.pgn',
 '2023-03.pgn',
 '2023-04.pgn',
 '2023-05.pgn',
 '2023-06.pgn',
 '2023-07.pgn']

In [30]:
games = []

for pgn_file_name in tqdm(pgn_file_names):
    pgn_file = open(f"{FILE_PATH}/{pgn_file_name}", "r")
    pgn_file_content : str = pgn_file.read()
    pgn_file.close()
    games_by_file = pgn_file_content.split("\n\n\n")
    games += games_by_file

  0%|          | 0/22 [00:00<?, ?it/s]

In [31]:
metadata = {}
metadata["total"] = len(games)
metadata_string = json.dumps(metadata, indent=2)
metadata_file = open(f"./{USERNAME}/metadata.json", "w")
metadata_file.write(metadata_string)
metadata_file.close()

In [32]:
def chunk(list : list, number : int):
    # looping till length l
    for i in range(0, len(list), number): 
        yield list[i:i + number]

In [33]:
stockfish = Stockfish()

In [34]:
def evaluate_move(loss : int):
    if loss >= BLUNDER_CENTIPAWN:
        return BLUNDER_SYMBOL
    if loss >= MISTAKE_CENTIPAWN:
        return MISTAKE_SYMBOL
    if loss >= INACCURACY_CENTIPAWN:
        return INACCURACY_SYMBOL
    return ''

In [35]:
def get_pgn_moves(pgn : str) -> list[str]:
    lines : list[str] = pgn.split("\n")
    lines : list[str] = list(filter(lambda line: line != "", lines))
    moves_string : str = lines[-1]
    moves : list[str] = moves_string.split(" ")
    moves.pop() # Remove result (1-0 or 0-1)
    moves : list[str] = list(filter(lambda move: "{" not in move and "}" not in move and "..." not in move, moves))
    moves : list[str] = [item for index, item in enumerate(moves) if index % 3 != 0]
    return moves

In [36]:
def get_fen_moves(moves : list[str]) -> list[dict]:
    fen_moves = []
    board : chess.Board = chess.Board()
    for index, move in enumerate(moves):
        move_number : int = math.floor(index / 2) + 1
        side : str = 'white' if index % 2 == 0 else 'black'
        board.push_san(move)
        fen : str = board.fen()
        fen_moves.append({
            "move_number": move_number,
            "side": side,
            "move": move,
            "fen": fen
        })
    board.reset()
    return fen_moves

In [37]:
def get_top_move(fen : str) -> dict:
    if not stockfish.is_fen_valid(fen):
        return { "centipawn": 0, "pawn": 0, "mate_in": 0, "evaluated": False }
    try:
        stockfish.set_fen_position(fen)
        top_moves : list[dict] = stockfish.get_top_moves(5)
        top_move : dict = top_moves[0]
        top_move_centipawn = top_move.get("Centipawn")
        print("get_top_move top_move_centipawn", top_move_centipawn)
        if top_move_centipawn == None:
            return { "centipawn": 0, "pawn": 0, "mate_in": 0, "evaluated": False }
        if "M" in str(top_move_centipawn):
            print("get_top_move mate_in", mate_in)
            mate_in = int(top_move_centipawn.replace("M", ""))
            return { "centipawn": 0, "pawn": 0, "mate_in": mate_in, "evaluated": True }
        top_move_pawn : float = top_move_centipawn / 100
        return { "centipawn": top_move_centipawn, "pawn": top_move_pawn, "mate_in": 0, "evaluated": True }
    except:
        print("get_top_move error")
        return { "centipawn": 0, "pawn": 0, "mate_in": 0, "evaluated": False }

In [38]:
def analyse_pgn(pgn : str):
    moves = get_pgn_moves(pgn)
    fen_moves = get_fen_moves(moves)
    game_details = [{
        "move_number": 0,
        "side": "",
        "move": "",
        "fen": "",
        "pawn": 0,
        "centipawn": 0,
        "mate_in": 0,
        "evaluation": ""
    }]
    for index, move_with_fen in enumerate(fen_moves):
        move_number : int = move_with_fen.get("move_number", 0)
        side : str = move_with_fen.get("side", "")
        move : str = move_with_fen.get("move", "")
        fen : str = move_with_fen.get("fen", "")
        top_move : dict = get_top_move(fen)
        top_move_evaluated : bool = top_move.get("evaluated", False)
        top_move_centipawn : int = top_move.get("centipawn", 0)
        top_move_pawn : float = top_move.get("pawn", 0)
        top_move_mate_in : int = top_move.get("mate_in", 0)
        print(move_number, side, move, top_move_pawn, top_move_centipawn, top_move_mate_in)
        if not top_move_evaluated:
            game_details.append({
                "move_number": move_number,
                "side": side,
                "move": move,
                "fen": fen,
                "pawn": 0,
                "centipawn": 0,
                "mate_in": 0,
                "evaluation": ""
            })
            continue
        previous : dict = game_details[index]
        previous_centipawn : int = previous.get("centipawn", 0)
        centipawn_loss : int = top_move_centipawn - previous_centipawn
        evaluation : str = evaluate_move(centipawn_loss)
        game_details.append({
            "move_number": move_number,
            "side": side,
            "move": move,
            "fen": fen,
            "pawn": top_move_pawn,
            "centipawn": top_move_centipawn,
            "mate_in": top_move_mate_in,
            "evaluation": evaluation
        })
    return game_details

In [39]:
game = games[0]
game_details = analyse_pgn(game)
game_details_data_frame = pandas.DataFrame(game_details)
game_details_data_frame

get_top_move top_move_centipawn 37
1 white e4 0.37 37 0
get_top_move top_move_centipawn 28
1 black e5 0.28 28 0
get_top_move top_move_centipawn 19
2 white Nf3 0.19 19 0
get_top_move top_move_centipawn 39
2 black Nc6 0.39 39 0
get_top_move top_move_centipawn 28
3 white d4 0.28 28 0
get_top_move top_move_centipawn 42
3 black exd4 0.42 42 0
get_top_move top_move_centipawn 6
4 white Nxd4 0.06 6 0
get_top_move top_move_centipawn 64
4 black Nxd4 0.64 64 0
get_top_move top_move_centipawn 76
5 white Qxd4 0.76 76 0
get_top_move top_move_centipawn 69
5 black Qf6 0.69 69 0
get_top_move top_move_centipawn 74
6 white e5 0.74 74 0
get_top_move top_move_centipawn 57
6 black Qb6 0.57 57 0
get_top_move top_move_centipawn 44
7 white Qxb6 0.44 44 0
get_top_move top_move_centipawn 54
7 black axb6 0.54 54 0
get_top_move top_move_centipawn 20
8 white Bc4 0.2 20 0
get_top_move top_move_centipawn 55
8 black d6 0.55 55 0
get_top_move top_move_centipawn 66
9 white Bf4 0.66 66 0
get_top_move top_move_centipawn 4

Unnamed: 0,move_number,side,move,fen,pawn,centipawn,mate_in,evaluation
0,0,,,,0.00,0,0,
1,1,white,e4,rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR ...,0.37,37,0,
2,1,black,e5,rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBN...,0.28,28,0,
3,2,white,Nf3,rnbqkbnr/pppp1ppp/8/4p3/4P3/5N2/PPPP1PPP/RNBQK...,0.19,19,0,
4,2,black,Nc6,r1bqkbnr/pppp1ppp/2n5/4p3/4P3/5N2/PPPP1PPP/RNB...,0.39,39,0,
...,...,...,...,...,...,...,...,...
129,65,white,Qc4+,8/8/8/4p3/2Q2k2/8/4K3/8 b - - 11 65,35.17,3517,0,??
130,65,black,e4,8/8/8/8/2Q1pk2/8/4K3/8 w - - 0 66,96.05,9605,0,??
131,66,white,Qd4,8/8/8/8/3Qpk2/8/4K3/8 b - - 1 66,34.57,3457,0,
132,66,black,Kf5,8/8/8/5k2/3Qp3/8/4K3/8 w - - 2 67,0.00,0,0,
