# Analysis of Real World Chess Games with a Chess Engine

In [None]:
import io
import os
import time
import csv
import chess
import chess.pgn
import pandas as pd
from pathlib import Path

In [None]:
stockfish_Path = Path(r"C:\Users\cianw\Chess Engines\Latest\stockfish-windows-x86-64-avx2\stockfish\stockfish-windows-x86-64-avx2.exe")
lc0_Path = Path(r"C:\Users\cianw\Chess Engines\Latest\lc0-v0.30.0-windows-gpu-nvidia-cuda\lc0.exe")
 
outputName = "allRatings"
pgnFolder = r"E:\ChessData"
csvFolder = r"E:\ChessData\explorationOutputs"
pgnName = "allRatings"
pgnIn = Path(rf"{csvFolder}\{pgnName}.tsv")
pgnOut = Path(rf"{csvFolder}\{pgnName}_output_20230918.tsv")
pgnOut_iter = Path(rf"{csvFolder}\{pgnName}_output_20230918_iter.tsv")

In [None]:
lichessData = pd.read_csv(pgnIn, sep = "\t")
lichessData['UTC_dateTime'] = pd.to_datetime(lichessData['UTCDate'] + ' ' + lichessData['UTCTime'])
lichessData.describe()

openingVariable = 'Opening'

In [None]:
stockfish_engine = chess.engine.SimpleEngine.popen_uci(stockfish_Path)
stockfish_options = {'Clear Hash':True, 'Threads': 4}
stockfish_engine.configure(stockfish_options)

In [None]:
def evaluateGame(games, loadedEngine, engineOptions):
    global linesProcessed, dataFrameSize, printThreshold, start_time
    
    gameMoves = chess.pgn.read_game(io.StringIO(games['Moves']))
    gameMoves.headers

    board = gameMoves.board()
    evalList1 = []
    depthList1 = []
    seldepthList1 = []
    loadedEngine.configure(engineOptions)
    moveCount=0
    for move in gameMoves.mainline_moves():
        board.push(move)
        moveCount+=1
        if moveCount<games['halfMoveCount'] :
            pass
        elif ((moveCount-games['halfMoveCount'])/10)==5:
            break
        elif ((moveCount-games['halfMoveCount']))%10==0 and (moveCount-games['halfMoveCount'])>=0:
            info1 = loadedEngine.analyse(board, limit=chess.engine.Limit(time=1), info=chess.engine.INFO_ALL)
            score1 = info1['score'].white().score()
            evalList1.append(score1)
            depthList1.append(info1['depth'])
            if info1.get("seldepth", 0):
                seldepthList1.append(info1.get("seldepth", 0))
            else:
                seldepthList1.append(None)
    linesProcessed += 1
    if linesProcessed%1000 == 0:
        print(linesProcessed)
        print((time.time() - start_time))
    return evalList1, depthList1, seldepthList1

In [None]:
def process_data(chunk):
    chunk_out = pd.DataFrame()
    chunk_out[['SF_eval','SF_depth','SF_seldepth']] = chunk.apply(  evaluateGame,
                                                                    loadedEngine=stockfish_engine,
                                                                    engineOptions = stockfish_options,
                                                                    axis=1, 
                                                                    result_type='expand')
    return pd.concat([chunk, chunk_out], axis=1)

In [None]:
def csvCommit(outFile, dictToWrite, csvHeaders):
    file_exists = os.path.isfile(outFile)
    with open(outFile, mode='a', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=csvHeaders)
        if not file_exists:
            writer.writeheader()  # file doesn't exist yet, write a header
        for row in dictToWrite:
            writer.writerow(row)

In [None]:
lichessData['winLossDraw'] = lichessData['Result'].str.split('-').str[0]
df = lichessData

from sklearn.model_selection import train_test_split
sample_df,_ = train_test_split(df
                               , train_size=0.01
                               , random_state=123
                               , stratify=df[[openingVariable, 'whiteWin']])
                                     
chunk_size = 500  # Adjust this based on your memory constraints
# Define your data processing function here

linesProcessed = 0
dataFrameSize = len(df)
printThreshold = dataFrameSize/1000
start_time = time.time()
analysis_df = sample_df
processed_df = pd.DataFrame()

In [None]:
for start_idx in range(0, len(analysis_df), chunk_size):
    startTime = time.time()
    end_idx = start_idx + chunk_size
    chunk = analysis_df.iloc[start_idx:end_idx]
    
    try:
        # Process the chunk and add new columns
        processed_chunk = process_data(chunk)
        processed_df = pd.concat([processed_df, processed_chunk], ignore_index=True)
    except Exception as e:
        print(f"Error occurred: {e}")
    
    if os.path.exists(pgnOut_iter):
        processed_chunk.to_csv(pgnOut_iter, sep="\t", mode='a', header=False)
    else:
        processed_chunk.to_csv(pgnOut_iter, sep="\t", mode='w')
        
    
    print(time.time()-startTime)
    print(f"{start_idx} Completed")

In [None]:
# Save the final processed DataFrame to a file
processed_df.to_csv(pgnOut, sep="\t")