In [2]:
import chess
import chess.engine
import chess.pgn
from datetime import datetime
import pandas as pd
import math
from tqdm import tqdm
from typing import Union, List
import zipfile
import os
import numpy as np
import json

In [15]:
def evaluate_position(board: chess.Board , engine: chess.engine.SimpleEngine, limit: chess.engine.Limit):
   info = engine.analyse(board, limit)
   return info['score'].white().score(mate_score=1000)

def parse_elo_rating(rating_str: str) -> Union[int, None]:
    try: 
        rating = int(rating_str)
    except ValueError:
        return None
    return rating

def parse_date(date_str: str) -> Union[datetime, None]:
    try:
        date = datetime.strptime(date_str, '%Y.%m.%d')
    except:
        try:
            date = datetime.strptime(date_str, '%Y.??.??')
        except:
            return None

    return date 

def read_games(pgn_path: str) -> List[chess.pgn.Game]:
    games = []
    with open(pgn_path) as file:
        while True:
            game = chess.pgn.read_game(file)
            if game is None:
                break  # end of games in file
            games.append(game)
    return games

def save_output(path: str, output: List[dict]):
    with open(path, 'w') as fout:
        json.dump(output, fout)

In [18]:
stockfish_path = '/usr/local/Cellar/stockfish/15/bin/stockfish'
engine = chess.engine.SimpleEngine.popen_uci(stockfish_path)
movetimesec = 999
depth = 5
limit = chess.engine.Limit(time=movetimesec, depth=depth)

In [27]:
data_path = 'data' 
output_path = 'processed_data'
for player_png_file in os.listdir(data_path):
    output = []
    player_name = player_png_file.split('.')[0].lower()
    try:
        pgn_path = f'{data_path}/{player_png_file}'
        games = read_games(pgn_path)
    except:
        print(f'Couldnt read games of player: {player_name}.')
        continue
    
    for i, game in enumerate(games, start=1):
        if i > 10:
            break
        try: 
            print(f'[{player_name}] Game: {i}/{len(games)}')
            event = game.headers['Event']
            date_str = game.headers['Date']
            date = date_str
            # date = parse_date(date_str)
            # if date is None:
            #   print(f'Could not parse date: {date_str}')

            white_player = game.headers['White']
            black_player = game.headers['Black']
            white_elo = parse_elo_rating(game.headers['WhiteElo'])
            black_elo = parse_elo_rating(game.headers['BlackElo'])
            result = game.headers['Result']
            white_won = result == '1-0'
            draw = result == '1/2-1/2'
            black_won = result == '0-1'

            if white_elo is None and black_elo is None:
                print('Missing ELO for both players.')
                continue

            board = game.board()
            init_evaluation = evaluate_position(board, engine, limit)
            evaluations = [init_evaluation]
            moves = game.mainline_moves()
            for move in moves:
                board.push(move)
                position_evaluation = evaluate_position(board, engine, limit)
                evaluations.append(position_evaluation)

            evaluations = np.array(evaluations)
            white_centipawn_losses = -np.diff(evaluations)[::2]
            black_centipawn_losses  = np.diff(evaluations)[1::2]
            white_avg_cp_loss = np.mean(white_centipawn_losses)
            black_avg_cp_loss = np.mean(black_centipawn_losses)
            
            # print(f'Avg white cp loss: {white_avg_cp_loss:.2f}')
            # print(f'Avg black cp loss: {black_avg_cp_loss:.2f}')

            game_output = {
                'event': event,
                'date': date,
                'white_player': white_player,
                'black_player': black_player,
                'white_elo': white_elo,
                'black_elo': black_elo,
                'result': result,
                'white_cp_losses': white_centipawn_losses,
                'black_cp_losses': black_centipawn_losses,
            }
            output.append(game_output)
        except:
            print(f'Couldnt process game: {i} of player: {player_name}.')

    np.save(f'{output_path}/{player_name}.npy', output)
    # save_output(f'{output_path}/{player_name}.json', output)



[bruzon] Game: 1/690
[bruzon] Game: 2/690
[bruzon] Game: 3/690
[bruzon] Game: 4/690
[bruzon] Game: 5/690
[bruzon] Game: 6/690
[bruzon] Game: 7/690
[bruzon] Game: 8/690
[bruzon] Game: 9/690
[azmaiparashvili] Game: 1/1336
Missing ELO for both players.
[azmaiparashvili] Game: 2/1336
[azmaiparashvili] Game: 3/1336
Missing ELO for both players.
[azmaiparashvili] Game: 4/1336
Missing ELO for both players.
[azmaiparashvili] Game: 5/1336
Missing ELO for both players.
[azmaiparashvili] Game: 6/1336
Missing ELO for both players.
[azmaiparashvili] Game: 7/1336
[azmaiparashvili] Game: 8/1336
Missing ELO for both players.
[azmaiparashvili] Game: 9/1336
Missing ELO for both players.
[defirmian] Game: 1/1706
[defirmian] Game: 2/1706
[defirmian] Game: 3/1706
[defirmian] Game: 4/1706
[defirmian] Game: 5/1706
[defirmian] Game: 6/1706
[defirmian] Game: 7/1706
[defirmian] Game: 8/1706
[defirmian] Game: 9/1706
[caruana] Game: 1/3430
[caruana] Game: 2/3430
[caruana] Game: 3/3430
[caruana] Game: 4/3430
[caru

In [26]:
np.load('processed_data/bruzon.npy', allow_pickle=True)[0]

{'event': 'CUB-ch',
 'date': '1997.03.20',
 'white_player': 'Bruzon, Lazaro',
 'black_player': 'Rodriguez, Amador',
 'white_elo': None,
 'black_elo': 2555,
 'result': '0-1',
 'white_cp_losses': array([  1,   7,   0,  -1,  45,  -7,  22, 172,  22,   0, 232,   0, 196,
         39, -12, 219,   0, 123,  25,  31,  17,   0,   9,   7,  -4,  41,
         38,  98,  15,   0, 124,   0,  59]),
 'black_cp_losses': array([  9,   5,  14,   0,   0,   0, 151,   0,   0,   0,   9,  35, 148,
        -20, 151,   0,  81,  89,   6,  19, -13,   0,  18,   5, -22,   0,
         60,   3, -18,  15,  13,  59,  -8])}