# Get score and variance for each move in game
Parse pgn (once converted to single lines) to get list of stockfish score and variance after each move, then split into black and white

## Read in packages, graph, and functions

In [1]:
from collections import defaultdict
import sys
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import re
import chess
import subprocess
import timeit
import pickle
import stockfish
from stockfish import Stockfish
import chess
import chess.engine
import warnings
warnings.filterwarnings('ignore')

engine = chess.engine.SimpleEngine.popen_uci("../stockfish-10-64")
stockfish = Stockfish("../stockfish-10-64")

In [2]:
g = nx.read_gpickle('../Directed_Graphs/690k_30.gpickle')

In [3]:
def varWeighted(scores, counts):
    if (len(counts)<2):
        return None
    else:
        weightedMean = sum([a*b for a,b in zip(scores,counts)])/sum(counts)
        scoreDiffs = [(score - weightedMean)**2 for score in scores]
        V1 = sum(counts)
        V2 = sum([count**2 for count in counts])
        var = (V1/(V1**2-V2)) * sum([a*b for a,b in zip(counts,scoreDiffs)])
        return var
    
def get_score(san):
    board = chess.Board()
    for move in san:
        try:
            board.push_san(move)
        except:
            break
    currFen = board.fen()
    score = engine.analyse(board, chess.engine.Limit(time=.05), info=chess.engine.INFO_SCORE)
    if('#' in str(score['score'])):
        if('-' in str(score['score'])):
            return -39765
        else:
            return 39765
    else:
        score = int(str(score["score"]))
    return score

def san_to_fen(san):
    board = chess.Board()
    for move in san:
        try:
            board.push_san(move)
        except:
            break
    currFen = board.fen()
    return currFen

def get_node_sd(node):
    scores = []
    counts = []
    try:
        for neighbor in list(g.neighbors(node)):
            scores.append(nx.get_node_attributes(g, 'score')[neighbor])
            counts.append(nx.get_node_attributes(g, 'movelistCount')[neighbor][node])
        return node, np.sqrt(varWeighted(scores,counts))
    except:
        return node, None

## Read in pgn file

In [None]:
my_file = open("pgnparse.txt", "r")
pgn = my_file.read()

pgn_list = pgn.split("\n")
my_file.close()
print(len(pgn_list))

pgn_list = list(filter(None, pgn_list))
print(len(pgn_list))

In [None]:
pgn_list[1]

## Create Dataframe from pgn file by splitting into dictionary of headers and values

In [None]:
start = timeit.default_timer()
d = defaultdict(list)
cnt = 0
for item in pgn_list:
    l = item.split('[mainline')
    try:
        info = l[0].split('"')
        #mainline = l[1]
        tuples = list(zip(info[::2],info[1::2]))
        tuples.append(('mainline', l[1]))

        for a, b in tuples:
            d[a].append(b)

        
    except:
         print('problem parsing:', item)
print("TIME:", timeit.default_timer()- start)

#### DF cleaning and split san into list of moves (remove decimals)

In [None]:
df = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in d.items() ]))
columns = [x.strip(' ') for x in df.columns]
columns = [x.strip('[') for x in columns]
df['san'] = df.mainline.apply(lambda x: [ i for i in x.split() if "." not in i ])
df.columns = columns
print(df.shape)
df.head(2)

## Function to get score and Variance 
for each move in san, then split by black and white

In [None]:
def get_sd_score_from_san(san):
    board = chess.Board()
    sd_list = []
    score_list = []
    for move in san:
        try:
            board.push_san(move)
            currFen = board.fen()
            board = chess.Board(currFen)
            sf = engine.analyse(board, chess.engine.Limit(time=0.05))
            
            if('#' in str(sf['score'])):
                if('-' in str(sf['score'])):
                    sf =  -39765
                else:
                    sf =  39765
            else:
                sf = int(str(sf["score"]))*-1
            score_list.append(sf)
            scores = []
            counts = []
            for neighbor in list(g.neighbors(currFen)):
                scores.append(nx.get_node_attributes(g, 'score')[neighbor])
                counts.append(nx.get_node_attributes(g, 'count')[neighbor])

            sd_list.append(np.sqrt(varWeighted(scores, counts)))
        except:
            break
    return pd.Series([sd_list[::2],sd_list[1::2], score_list[::2],score_list[1::2]]) 
        ##white, black sd, white, black score
#     return(sd_list, score_list)

#### Build a small dataframe to test on... this is super slow

In [4]:
#Get a subset of df
df_test = df.head(2000)

NameError: name 'df' is not defined

In [None]:
# df_small['sd_list'] = df.san.apply(lambda x: get_sd_score_from_san(x)[0])
# df_small['sf_list'] = df.san.apply(lambda x: get_sd_score_from_san(x)[1])
start = timeit.default_timer()
df_test[['sd_white', 'sd_black','score_white', 'score_black']] = df_test.san.apply(
    lambda x: get_sd_score_from_san(x))
print("TIME:",timeit.default_timer()-start)
df_test.head(2)

#### Write df to pickle

In [None]:
#df_test.to_pickle("./DF_sd_sf_2000.pkl")