In [1]:
%%capture capt
!pip install matplotlib
!pip install seaborn
!pip install scikit-learn
!pip install chess

In [2]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
from collections import Counter
import time
import seaborn as sns
import math

import warnings
warnings.filterwarnings("ignore")

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score

import chess
import chess.engine

In [3]:
def pretty_print(number):
    formatted_number = "{:,}".format(number)
    print(formatted_number)

# Load data

In [4]:
%run S3.ipynb

In [5]:
#moves_df = pd.read_csv("../Data/moves_df.csv")
moves_df = open_csv("moves_df_rnd.csv")

In [6]:
moves_df

Unnamed: 0,evaluation,fen
0,35,rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR ...
1,48,rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBN...
2,111,rnbqkbnr/pppp1ppp/8/4p3/4P3/5N2/PPPP1PPP/RNBQK...
3,47,r1bqkbnr/pppp1ppp/2n5/4p3/4P3/5N2/PPPP1PPP/RNB...
4,52,r1bqkbnr/pppp1ppp/2n5/1B2p3/4P3/5N2/PPPP1PPP/R...
...,...,...
9117278,366,r7/5bk1/4P2b/p1p4p/P1P1pprP/NP6/R7/1NB2K1R w -...
9117279,-109,r7/5bk1/4P2b/p1p4p/P1P1pprP/NP6/5R2/1NB2K1R b ...
9117280,396,8/5bk1/r3P2b/p1p4p/P1P1pprP/NP6/5R2/1NB2K1R w ...
9117281,-210,8/5bk1/r3P2b/p1p4p/P1P1pprP/NP5R/5R2/1NB2K2 b ...


# RandomPlay to add data

In [7]:
# we calculated the average length of a game before
# as it takes a while to compute, we keep it as a constant
mean_game_length = 76

### Evaluate chess positions

In [8]:
# functions to evaluate a given chess position, using a chess engine named stockfish

TIME_LIMIT = 0.3

def stockfish_evaluation(board, engine, time_limit = TIME_LIMIT):
    #returns the score of the position, from the perspective of the white player
    #we will never change perspective throughout our work 
    #engine = chess.engine.SimpleEngine.popen_uci("../../Stockfish/linux/stockfish/stockfish-ubuntu-x86-64-avx2")
    result = engine.analyse(board, chess.engine.Limit(time=time_limit))
    #engine.close()
    return result['score'].white()

def position_eval(board, engine, time_limit = TIME_LIMIT):
    # we need to consider scores where the engine has found a way to deliver mate differently
    # because is those cases, the score returned is a string, not an integer
    score = stockfish_evaluation(board, engine, time_limit)
    if not score.is_mate():
        return score.score()
    #the engine has found a way to mate in a certain number of moves
    return mateScore(score)

def findNumberOfMovesBeforeMate(score):
    str_nb_moves_before_mate = ''
    s = str(score)
    i = len(s)-1
    while i >= 0 and s[i].isnumeric():
        str_nb_moves_before_mate = s[i] + str_nb_moves_before_mate
        i -= 1
    return int(str_nb_moves_before_mate)

def mateScore(score):
    nb_moves_before_mate = findNumberOfMovesBeforeMate(score)
    white_is_winning = (str(score.wdl())[9] == '1')
    # a.wdl() gives the probability of winning for white
    # if the 9th character is equal to 1, then white will deliver mate shortly
    # otherwise, white will lose soon and black is winning
    score_for_mate = 10000 - nb_moves_before_mate*100
    if not white_is_winning:
        score_for_mate = (-1)*score_for_mate
    return score_for_mate

### Random Play

In [9]:
def random_move(board):
    legal_moves = list(board.legal_moves)
    move = np.random.choice(legal_moves)
    return move

def random_game(length=mean_game_length):
    board = chess.Board()
    engine = chess.engine.SimpleEngine.popen_uci("../../Stockfish/linux/stockfish/stockfish-ubuntu-x86-64-avx2")
    positions = np.empty((length, 2), dtype=object)
    for i in range(length):
        move = random_move(board)
        board.push(move)
        eval = position_eval(board, engine)
        fen = board.fen()
        to_insert = [eval, fen]
        positions[i] = to_insert
        if board.is_game_over():
            engine.close()
            return positions[:i+1]
    engine.close()
    return positions

### Multiprocessing

In [10]:
import multiprocessing
from multiprocessing import Pool

processes_in_parallel = multiprocessing.cpu_count() 
print(f"Processes in parallel -> {processes_in_parallel}")

Processes in parallel -> 104


In [11]:
def save(moves_evaluations):
    df = pd.DataFrame(moves_evaluations, columns=moves_df.columns)
    to_csv(df, "moves_df_rnd.csv")
    return

In [12]:
moves_evaluations = moves_df.values
pretty_print(len(moves_evaluations))

9,117,283


In [20]:
games_added = 0
cur_cycle_games_added = 0
games_to_add = 1000

In [21]:
if __name__ == "__main__":
    with tqdm(total=games_to_add) as pbar:
        while games_added < games_to_add:
            with Pool() as p:
                async_results = [p.apply_async(random_game, args=(mean_game_length, )) for _ in range(processes_in_parallel)]
                
                for i in range(processes_in_parallel):
                    random_positions = async_results[i].get()
                    moves_evaluations = np.insert(moves_evaluations, len(moves_evaluations), random_positions, axis=0)
                    games_added += 1
                    cur_cycle_games_added += 1
                    pbar.update(1)
    
            p.close()
            p.join()
            if cur_cycle_games_added >= 5000:
                save(moves_evaluations)
                cur_cycle_games_added = 0
save(moves_evaluations)

1040it [11:58,  1.45it/s]                           
