In [None]:
import pandas as pd
import numpy as np
import glob, os
import anal_games, functions_anal
from sklearn.model_selection import train_test_split
import winning_chances

# Calculate Winning Chance for each position

Make list of games, train-test split

In [None]:
outfile='../Cleaned_Analyzed_Games/all_games_cleaned.csv'

# make list of games, cleaned, count moves
anal_games.process_all_files(outfile=outfile,filenames=["../huge_analyzed_games/combined_analyzed_games.csv"],functions=[functions_anal.MovesTotal,functions_anal.Cleanup,functions_anal.MovesBlack,functions_anal.MovesWhite],skip_if_processed=True,game_wise=True)

df=pd.read_csv(outfile)

# Train Test split
df_train,df_test=train_test_split(df,test_size=0.2,random_state=42) # stratification with number of moves or elos doesn't work, as it needs at least two games for each unique value/combination of values. Binning doesn't help

# save training set 
df_train.to_csv('../Cleaned_Analyzed_Games/all_games_cleaned_train.csv',index=False)
df_test.to_csv('../Cleaned_Analyzed_Games/all_games_cleaned_test.csv',index=False)

For each move, bin the evaluation based on the outcome of the game. This will give us the winning and losing chance for a given evaluation. 

In [None]:
# output files for winning chances
file_prefix='../Cleaned_Analyzed_Games/winning_chances_'
file_suffix='.csv'

# make bins for evaluations
bins=np.arange(-20.05,20.15,0.1)

# Get maximum number of moves
df=pd.read_csv('../Cleaned_Analyzed_Games/all_games_cleaned_train.csv')

# make array for winning chances
winchance_array=np.zeros((3,len(bins)+2)) # 3 for win, draw and loss, +2 to account for values out of bounds
count_games=np.zeros((len(bins)+2))

for file in df_train['File'].unique(): # loop over files
    print(file)
    data=pd.read_csv(file)
    df_file=df_train.where(df_train['File']==file) # check which training games are in that file
    df_file.dropna(how='any',inplace=True)
    for line in df_file.iterrows(): # loop over games in training set from that file
        data_line=line[1]
        file=data_line['File']
        ind=data_line['LineStart'] # starting line for the game
        assert data.loc[ind, "GameID"]==data_line['GameID'] # sanity checks
        assert not np.isnan(data.loc[ind, "WhiteElo"]) # sanity checks
        ind,game=anal_games.read_game(data,ind,functions=[],game_wise=False) # reads a game, rejects it if invalid, outputs a game dictionary

        if game is None or winning_chances.get_outcome_num(game) is None: # sanity check, should be taken care of earlier
            continue
        for j in range(len(game['Move'])): # bin output and move evaluation
            i_bin=np.digitize(game['Evaluation'][j],bins=bins)
            winchance_array[winning_chances.get_outcome_num(game),i_bin]+=1
            count_games[i_bin]+=1

for i in range(3): # normalize win chance array
    winchance_array[i,:,:]=np.divide(winchance_array[i,:],count_games)*100


out_line=winchance_array
out_line=winning_chances.smooth_lines(winchance_array,count_games,bins) # smooth win chance array to give values to evaluations where we don't have games

# transform output into DataFrame and save
bins_new=bins.tolist()
bins_new.insert(0,'-20-')
bins_new.append('20+')
data=pd.DataFrame({'bins':bins_new,'WinningChance':out_line[0,:],
                    'DrawChance':out_line[1,:],
                    'LosingChance':out_line[2,:],
                    'TotalGames':count_games[:,i]})
data=data[['bins','WinningChance','DrawChance','LosingChance','TotalGames']]
data.to_csv(file_prefix+'all_moves'+file_suffix,index=False)

Calculate the Winning Chance loss at every move by making the difference between the winning chance before and after the move

In [None]:
all_games_file = "../huge_analyzed_games/combined_analyzed_games.csv"
all_games=pd.read_csv(all_games_file)

winning_chances.process_chess_data(all_games,winning_chance_table=file_prefix+'all_moves'+file_suffix,intervals=bins)

Bin the Winning Chance Loss in mistake bins

In [None]:
mistake_bins= [5, 10, 15, 20, 25, 30, 35, 40, 50, 60, 70, 100]
summary_table=winning_chances.create_summary_table(df,mistake_bins=mistake_bins)
summary_table.to_csv("../huge_analyzed_games/big_summary_table.csv")