In [None]:
# Analyze games and save evaluation for each move

In [None]:
import chess #https://github.com/niklasf/python-chess
from stockfish import Stockfish

#We'll create an intentionally worse stockfish engine to estimate complexity of positions
stockfish_good=Stockfish("C:/Users/aober/Documents/Data_Science_Studium/4Semester/BigData/stockfish/stockfish-windows-x86-64-avx2.exe")
stockfish_good.set_depth(15) 
stockfish_good.set_skill_level(15) 
import chess.pgn
import pandas as pd
pd.options.display.max_columns=999
import datetime
import tqdm
import zipfile
import numpy as np

In [None]:
file_name = "apendra_games"

In [None]:
games=pd.read_csv(f"../../testData/{file_name}.csv")

In [None]:
def safe_convert_to_int(value):
    try:
        return int(value)
    except ValueError:
        return None  # or you can use np.nan or a placeholder like -1

In [None]:
games['moves']=games['moves'].apply(lambda x: eval(x))
games['headers']=games['headers'].apply(lambda x: eval(x))#to dict

# Running Stored Game
All the functions down there, are used for the analysis.
get_accuracy(): is a simpel approach of calcualting the acccuracy of an opening
build_stored_game_analysis(): Is the evaluation of the game, the game is passed as a parameter an iterates over every move and uses the defined stockfish from the beginning of the document to evaluate the postion. In tis approach if the strongest version of stockfish is used, will be very slow.

In [None]:
#Standard point values for different pieces. Lowercase is white piece and uppercase is black
piece_scores={
    "p": 1,
    "P": -1,
    "r": 5,
    "R": -5,
    "n": 3,
    "N": -3,
    "b": 3,
    "B": -3,
    "q": 9,
    "Q": -9
}

In [None]:
def get_accuracy(evaluation_change):
    if (evaluation_change <= 25 and evaluation_change >= -25):
        return 100
    elif (evaluation_change > 25 and evaluation_change <= 50) or (evaluation_change < -25 and evaluation_change >= -50):
        return 75
    elif (evaluation_change > 50 and evaluation_change <= 75) or (evaluation_change < -50 and evaluation_change >= -75):
        return 50
    elif (evaluation_change > 75 and evaluation_change <= 100) or (evaluation_change < -75 and evaluation_change >= -100):
        return 25
    else:
        return 0


In [None]:
import random

#Used by chessnit.com
def build_stored_game_analysis(game, move_number, prev_evaluation):
    row={}
    row['move_number']=move_number
    row['taken']=[]
    board=chess.Board()
    for san in game['moves'][:move_number]:
        parsed_san=board.parse_san(san)
        taken=board.piece_at(parsed_san.to_square)
        if taken:
            row['taken'].append(taken.__str__())
        move=board.push_san(san)
    row['invalid']=bool(board.promoted) or bool(board.outcome())
    stockfish_good.set_fen_position(board.fen())
    evaluation=stockfish_good.get_evaluation()
    row['evaluation']=evaluation['value']
    row['evaluation_change']=evaluation['value']-prev_evaluation
    row['accuracy'] = get_accuracy(row['evaluation_change'])
    
    
    row['taken_score']=sum([piece_scores.get(p) for p in row['taken']])*100
    row['fen']=board.fen()
    row['url']=game['headers'].get("_tag_roster", {}).get("Site", "")+f"#{move_number}"
    try:
        row['last_move']=san
    except:
        print(game)
        row['invalid']=True
    return row, evaluation['value']


In [None]:

import pandas as pd
import tqdm

def analyze_games(games, n, max_move_number):
    all_game_analysis = []
    
    # Iterate over the first n games in the DataFrame
    prev_evaluation  = 0
    for i in tqdm.tqdm(range(min(n, len(games)))):
        game = games.iloc[i]
        game_analysis = []
        
        # Analyze each game up to the specified move number
        for move_number in range(1, max_move_number + 1):
            analysis_result, current_evaluation = build_stored_game_analysis(game, move_number,prev_evaluation)
            game_analysis.append(analysis_result)
            prev_evaluation = current_evaluation
        # Store the analysis results for each game
        all_game_analysis.append(pd.DataFrame(game_analysis).set_index("move_number"))
    
    return all_game_analysis


In [None]:

# Define the number of games and the maximum move number you want to analyze
n_games = 1000  # For example, analyze the first 10 games
max_move_number = 10

# Assuming 'games' is your DataFrame containing the games
games_analysis = analyze_games(games, n_games, max_move_number)

# Now, games_analysis is a list of DataFrames, each containing the analysis of one game


In [None]:
games[0:2]

In [None]:
type(games_analysis)

# Save as json

In [None]:
# Save the 'games' DataFrame to a JSON file
games[0:n_games].to_json("../../testData/games.json", orient='records', lines=True)

import json

# Convert each DataFrame in the list to a dictionary
games_analysis_dict = [df.to_dict(orient='records') for df in games_analysis]

# Save the list of dictionaries to a JSON file
with open("../../testData/games_analysis.json", "w") as file:
    json.dump(games_analysis_dict, file)
