In [18]:
import pandas as pd
import tqdm
from stockfish import Stockfish
import random
import chess
import chess.engine
import pandas as pd
import tqdm

# Setup the stockfish engine
stockfish_good=Stockfish("C:/Users/aober/Documents/Data_Science_Studium/4Semester/BigData/stockfish/stockfish-windows-x86-64-avx2.exe")
stockfish_good.set_depth(10) 
stockfish_good.set_skill_level(10) 

In [22]:

def get_fen_from_move_sequence(move_sequence):
    board = chess.Board()
    for move_san in move_sequence.split(' '):
        move = board.parse_san(move_san)
        board.push(move)
    return board.fen()


def analyze_positions_from_csv(file_path, amount):
    """
    Processes a CSV file of move sequences, evaluates positions with a minimum occurrence count,
    and returns a dictionary with the FEN as the key and the evaluation score as the value.

    :param file_path: Path to the CSV file containing the positions and counts.
    :param amount: The minimum count a position must have to be evaluated.
    :return: A dictionary of evaluated positions.
    """
    games_df = pd.read_csv(file_path)
    positions_dict = {}

    # Filter rows where the 'count' is at least the specified 'amount'
    filtered_games_df = games_df[games_df['count'] >= amount]

    for _, row in tqdm.tqdm(filtered_games_df.iterrows(), total=filtered_games_df.shape[0]):
        position = row['position']
        fen = get_fen_from_move_sequence(position)

        # Only evaluate if this position does not already exist in the dictionary
        if fen not in positions_dict:
            evaluation = stockfish_good.get_evaluation()
            positions_dict[fen] = evaluation['value']

    return positions_dict

def get_evaluation(stockfish, fen):
    info = stockfish.analyse(chess.Board(fen), chess.engine.Limit(time=0.1))
    return info['score'].white().score(mate_score=10000)

def analyze_positions(file_path, amount, stockfish_path, existing_positions=None):
    with chess.engine.SimpleEngine.popen_uci(stockfish_path) as stockfish:
        games_df = pd.read_csv(file_path)
        analysis_dict = {}  # Dictionary to store the FEN and evaluation
        existing_positions = existing_positions or {}
        duplicate_counter = 0  # Counter for duplicates

        # Filter based on 'count' value
        filtered_df = games_df[games_df['count'] >= amount]
        
        for _, row in tqdm.tqdm(filtered_df.iterrows(), total=filtered_df.shape[0]):
            position_string = row['position']
            fen = get_fen_from_move_sequence(position_string)

            if fen not in existing_positions:
                evaluation = get_evaluation(stockfish, fen)
                analysis_dict[fen] = evaluation
            else:
                duplicate_counter += 1

        print(f"Duplicate positions skipped: {duplicate_counter}")
        return analysis_dict

In [23]:
file_path = '../../../testData/splitted_games/final_processed_index.csv'
stockfish_path = ("C:/Users/aober/Documents/Data_Science_Studium/4Semester/BigData/stockfish/stockfish-windows-x86-64-avx2.exe")
minimum_count = 1000

existing_positions_dict = {}  
resulting_dict = analyze_positions(file_path, minimum_count, stockfish_path)

100%|██████████| 307/307 [00:32<00:00,  9.50it/s]

Duplicate positions skipped: 0





In [26]:
resulting_dict

{'rnbqkbnr/pppppppp/8/8/3P4/8/PPP1PPPP/RNBQKBNR b KQkq - 0 1': 31,
 'rnbqkb1r/pppppppp/5n2/8/3P4/8/PPP1PPPP/RNBQKBNR w KQkq - 1 2': 32,
 'rnbqkb1r/pppppppp/5n2/8/3P4/5N2/PPP1PPPP/RNBQKB1R b KQkq - 2 2': 34,
 'rnbqkb1r/pppp1ppp/4pn2/8/3P4/5N2/PPP1PPPP/RNBQKB1R w KQkq - 0 3': 32,
 'rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR b KQkq - 0 1': 35,
 'rnbqkbnr/pp1ppppp/8/2p5/4P3/8/PPPP1PPP/RNBQKBNR w KQkq - 0 2': 31,
 'rnbqkbnr/pp1ppppp/8/2p5/4PP2/8/PPPP2PP/RNBQKBNR b KQkq - 0 2': -13,
 'rnbqkbnr/pppppppp/8/8/8/6P1/PPPPPP1P/RNBQKBNR b KQkq - 0 1': 13,
 'rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBNR w KQkq - 0 2': 26,
 'rnbqkbnr/pppp1ppp/8/4p3/4P3/5N2/PPPP1PPP/RNBQKB1R b KQkq - 1 2': 33,
 'r1bqkbnr/pppp1ppp/2n5/4p3/4P3/5N2/PPPP1PPP/RNBQKB1R w KQkq - 2 3': 36,
 'r1bqkbnr/pppp1ppp/2n5/4p3/3PP3/5N2/PPP2PPP/RNBQKB1R b KQkq - 0 3': 18,
 'r1bqkbnr/pppp1ppp/2n5/8/3pP3/5N2/PPP2PPP/RNBQKB1R w KQkq - 0 4': 18,
 'r1bqkbnr/pppp1ppp/2n5/8/3NP3/8/PPP2PPP/RNBQKB1R b KQkq - 0 4': 13,
 'rnbqkbnr/ppp1pppp/8/