In [120]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.model_selection import KFold, cross_val_score, train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, MultiLabelBinarizer
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, mean_absolute_error, mean_squared_error
from xgboost import XGBClassifier, XGBRegressor
import warnings
import chess.engine
import chess.svg
from IPython.display import display, SVG, clear_output
import time
from tqdm import tqdm
from stockfish import Stockfish
import chess.pgn
import chess
import mmh3

In [227]:
# Utility functions

def compute_material_imbalance(board):
    piece_values = {chess.PAWN: 1, chess.KNIGHT: 3, chess.BISHOP: 3, chess.ROOK: 5, chess.QUEEN: 9}
    white_material = 0
    black_material = 0

    # Initialize variables to keep track of material counts for both sides
    white_material = 0
    black_material = 0

    # Iterate through the board and calculate material values for both sides
    for square in chess.SQUARES:
        piece = board.piece_at(square)
        if piece is not None:
            if piece.color == chess.WHITE:
                white_material += piece_values.get(piece.piece_type, 0)
            else:
                black_material += piece_values.get(piece.piece_type, 0)
    return white_material - black_material

def encoded_board(board):
    # Initialize a 64x64 vector with -1 for empty squares
    board_vector = [-1] * 64

    # Iterate through the board and populate the vector with piece encodings
    for square in chess.SQUARES:
        piece = board.piece_at(square)
        if piece is not None:
            # Encode the piece based on its type
            if piece.color == chess.WHITE:
                board_vector[square] = piece.piece_type
            else:
                # For black pieces, encode as negative values
                board_vector[square] = -piece.piece_type
    return board_vector

def count_pieces(board):
    white_piece_count = 0
    black_piece_count = 0

    # Iterate through the board to count pieces for both sides
    for square in chess.SQUARES:
        piece = board.piece_at(square)
        if piece is not None:
            if piece.color == chess.WHITE:
                white_piece_count += 1
            else:
                black_piece_count += 1

    return white_piece_count, black_piece_count

def number_of_pawns(board):
    white_pawn_count = 0
    black_pawn_count = 0

    # Iterate through the board to count pawns for both sides
    for square in chess.SQUARES:
        piece = board.piece_at(square)
        if piece is not None and piece.piece_type == chess.PAWN:
            if piece.color == chess.WHITE:
                white_pawn_count += 1
            else:
                black_pawn_count += 1

    return white_pawn_count, black_pawn_count

def number_of_knights(board):
    white_knight_count = 0
    black_knight_count = 0

    # Iterate through the board to count knights for both sides
    for square in chess.SQUARES:
        piece = board.piece_at(square)
        if piece is not None and piece.piece_type == chess.KNIGHT:
            if piece.color == chess.WHITE:
                white_knight_count += 1
            else:
                black_knight_count += 1

    return white_knight_count, black_knight_count

def number_of_bishops(board):
    white_bishop_count = 0
    black_bishop_count = 0

    # Iterate through the board to count bishops for both sides
    for square in chess.SQUARES:
        piece = board.piece_at(square)
        if piece is not None and piece.piece_type == chess.BISHOP:
            if piece.color == chess.WHITE:
                white_bishop_count += 1
            else:
                black_bishop_count += 1

    return white_bishop_count, black_bishop_count

def number_of_rooks(board):
    white_rook_count = 0
    black_rook_count = 0

    # Iterate through the board to count rooks for both sides
    for square in chess.SQUARES:
        piece = board.piece_at(square)
        if piece is not None and piece.piece_type == chess.ROOK:
            if piece.color == chess.WHITE:
                white_rook_count += 1
            else:
                black_rook_count += 1

    return white_rook_count, black_rook_count

def number_of_queens(board):
    white_queen_count = 0
    black_queen_count = 0

    # Iterate through the board to count queens for both sides
    for square in chess.SQUARES:
        piece = board.piece_at(square)
        if piece is not None and piece.piece_type == chess.QUEEN:
            if piece.color == chess.WHITE:
                white_queen_count += 1
            else:
                black_queen_count += 1

    return white_queen_count, black_queen_count


def hash_pawn_positions(board):
    white_pawn_positions = []
    black_pawn_positions = []

    # Iterate through the board's squares to find pawn positions
    for square in chess.SQUARES:
        piece = board.piece_at(square)
        if piece is not None and piece.piece_type == chess.PAWN:
            if piece.color == chess.WHITE:
                white_pawn_positions.append(chess.square_name(square))
            else:
                black_pawn_positions.append(chess.square_name(square))

    # Concatenate the positions into a string
    concatenated_positions = ",".join(white_pawn_positions) + "|" + ",".join(black_pawn_positions)

    # Hash the string using mmh3
    hashed_value = mmh3.hash(concatenated_positions)
    print(hashed_value)

    return hashed_value

def number_of_pieces(board):
    # Initialize a dictionary to hold the counts
    piece_counts = {}

    # Iterate through the board and count each piece
    for square in chess.SQUARES:
        piece = board.piece_at(square)
        if piece is not None:
            piece_counts[piece.symbol()] = piece_counts.get(piece.symbol(), 0) + 1

    return piece_counts

def generate_feature_vector(board):
    # Initialize an empty list to hold the feature vector
    feature_vector = []

    # Add whose turn it is to the feature vector
    feature_vector.append(int(board.turn))
    
    # Add the material imbalance to the feature vector
    material_imbalance = compute_material_imbalance(board)
    feature_vector.append(material_imbalance)

    # Encode the board and append it to the feature vector
    board_vector = encoded_board(board)
    feature_vector.extend(board_vector)

    # Check or Checkmate situation
    feature_vector.append(int(board.is_check()))
    feature_vector.append(int(board.is_checkmate()))

    # Castling rights
    feature_vector.append(int(board.has_kingside_castling_rights(chess.WHITE)))
    feature_vector.append(int(board.has_queenside_castling_rights(chess.WHITE)))
    feature_vector.append(int(board.has_kingside_castling_rights(chess.BLACK)))
    feature_vector.append(int(board.has_queenside_castling_rights(chess.BLACK)))

    # Add the king positions to the feature vector
    white_king_square = board.king(chess.WHITE)
    black_king_square = board.king(chess.BLACK)
    feature_vector.append(white_king_square)
    feature_vector.append(black_king_square)

    # Add the number of pieces left to the feature vector
    white_count, black_count = count_pieces(board)
    feature_vector.append(white_count)
    feature_vector.append(black_count)

    # Add the number of pawns left to the feature vector
    white_pawn_count, black_pawn_count = number_of_pawns(board)
    feature_vector.append(white_pawn_count)

    # Add the number of knights left to the feature vector
    white_knight_count, black_knight_count = number_of_knights(board)
    feature_vector.append(white_knight_count)

    # Add the number of bishops left to the feature vector
    white_bishop_count, black_bishop_count = number_of_bishops(board)
    feature_vector.append(white_bishop_count)

    # Add the number of rooks left to the feature vector
    white_rook_count, black_rook_count = number_of_rooks(board)
    feature_vector.append(white_rook_count)

    # Add the number of queens left to the feature vector
    white_queen_count, black_queen_count = number_of_queens(board)
    feature_vector.append(white_queen_count)



    return np.array(feature_vector)

stockfish = Stockfish(path="datasets/chess/stockfish", depth=1)

def get_stockfish_eval(fen_str):
    stockfish.set_fen_position(fen_str)
    score = stockfish.get_evaluation()['value']
    return score

In [196]:
# Create a list to store FEN strings
lst_of_feature_vectors = []
lst_of_fens = []
X = []
y = []
limit = 50000

# Open the PGN file
with open("datasets/chess/lichess_db_standard_rated_2013-02.pgn") as pgn_file:
    count = 0
    while True:
        # Read a game from the PGN file
        game = chess.pgn.read_game(pgn_file)
        
        # Break if no more games are in the file
        if game is None:
            break

        board = game.board()
        for move in game.mainline_moves():
            board.push(move)
            feature_vector = generate_feature_vector(board)
            lst_of_feature_vectors.append(feature_vector)
            lst_of_fens.append(board.fen())
            
        if len(lst_of_feature_vectors) >= limit:
            break

print(len(lst_of_feature_vectors), len(lst_of_fens))

for idx in tqdm(range(len(lst_of_fens))):
    X.append(lst_of_feature_vectors[idx])
    y.append(get_stockfish_eval(lst_of_fens[idx]))


X = np.array(X)
y = np.array(y)

print(X.shape, y.shape)
print(min(y), max(y))
data = np.concatenate([X, y.reshape(-1, 1)], axis=1)
data = pd.DataFrame(data)
print(data.head())
print(data.shape)
data.to_csv("datasets/chess/lichess_eval.csv", index=False, header=False)




50021 50021


100%|██████████| 50021/50021 [01:43<00:00, 482.43it/s]


(50021, 81) (50021,)
-4864 5406
   0   1   2   3   4   5   6   7   8   9   ...  72  73  74  75  76  77  78  \
0   0   0   4   2   3   5   6   3   2   4  ...   4  60  16  16   8   2   2   
1   1   0   4   2   3   5   6   3   2   4  ...   4  60  16  16   8   2   2   
2   0   1   4   2   3   5   6   3   2   4  ...   4  60  16  15   8   2   2   
3   1   1   4   2   3   5   6   3   2   4  ...   4  60  16  15   8   2   2   
4   0   2   4   2   3   5   6   3   2   4  ...   4  60  16  14   8   2   2   

   79  80   81  
0   2   1   47  
1   2   1    9  
2   2   1   51  
3   2   1   78  
4   2   1  112  

[5 rows x 82 columns]
(50021, 82)


In [197]:
def read_processed_data(filename):
    data = pd.read_csv(filename)
    data.dropna(inplace=True)
    print(data.head())
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values
    return X, y

X, y = read_processed_data('datasets/chess/lichess_eval.csv')

print(f'Shapes of X and y: {X.shape}, {y.shape}')
print(f'Max and min of y: {max(y)}, {min(y)}')

   0  0.1  4  2  3  5  6  3.1  2.1  4.1  ...  4.2  60  16  16.1  8  2.2  2.3  \
0  1    0  4  2  3  5  6    3    2    4  ...    4  60  16    16  8    2    2   
1  0    1  4  2  3  5  6    3    2    4  ...    4  60  16    15  8    2    2   
2  1    1  4  2  3  5  6    3    2    4  ...    4  60  16    15  8    2    2   
3  0    2  4  2  3  5  6    3    2    4  ...    4  60  16    14  8    2    2   
4  1    1  4  2  3  5  6    3    2    4  ...    4  60  15    14  7    2    2   

   2.4  1.12   47  
0    2     1    9  
1    2     1   51  
2    2     1   78  
3    2     1  112  
4    2     1  174  

[5 rows x 82 columns]
Shapes of X and y: (50020, 81), (50020,)
Max and min of y: 5406, -4864


In [207]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

print(X_train.shape, y_train.shape)

n_estimator = 500
learning_rate = 0.2
max_depth = 10
subsample = 0.8

model = XGBRegressor(n_estimators=n_estimator, learning_rate=learning_rate, max_depth=max_depth, n_jobs=-1, subsample=subsample)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(mean_absolute_error(y_test, y_pred))

(40016, 81) (40016,)
116.45677619579075


In [209]:
# test 
fen_str = "6R1/1k4P1/2P1p3/5p2/8/4P1r1/3K4/8 b - - 0 50"
board = chess.Board(fen_str)
feature_vector = generate_feature_vector(board)
feature_vector = np.array(feature_vector)
score = model.predict(feature_vector.reshape(1, -1))
print(score)
engine_score = get_stockfish_eval(fen_str)
print(engine_score)


[243.47675]
193


In [233]:
#engine = chess.engine.SimpleEngine.popen_uci("datasets/chess/stockfish.exe")
# Your evaluation function
def evaluate_board(board):
    if board.is_game_over():
        if board.result() == "1-0":
            return 10000
        elif board.result() == "0-1":
            return -10000
        else:
            return 0
    feature_vector = generate_feature_vector(board)
    return model.predict(feature_vector.reshape(1, -1))
    # score = get_stockfish_eval(board.fen())
    # return score

# Alpha-Beta pruning with evaluation threshold
def minimax(board, depth, alpha, beta, maximizing, threshold):
    if depth == 0 or board.is_game_over():
        return evaluate_board(board)
    
    if maximizing:
        max_eval = float('-inf')
        for move in board.legal_moves:
            board.push(move)
            eval = minimax(board, depth-1, alpha, beta, False, threshold)
            board.pop()
            max_eval = max(max_eval, eval)
            alpha = max(alpha, eval)
            if beta <= alpha:
                break
            if max_eval >= threshold:
                break
        return max_eval
    else:
        min_eval = float('inf')
        for move in board.legal_moves:
            board.push(move)
            eval = minimax(board, depth-1, alpha, beta, True, threshold)
            board.pop()
            min_eval = min(min_eval, eval)
            beta = min(beta, eval)
            if beta <= alpha:
                break
            if min_eval <= -threshold:
                break
        return min_eval

# Find the best move using minimax with alpha-beta pruning and evaluation threshold
def get_best_move(board, depth):
    a, b = count_pieces(board)
    x = a + b
    if x >= 28:
        threshold = 200
    else:
        threshold = float('inf')

    if board.turn == chess.WHITE:  # If it's white's turn
        best_eval = float('-inf')
        maximizing = True
    else:  # If it's black's turn
        best_eval = float('inf')
        maximizing = False

    best_move = None
    for move in board.legal_moves:
        board.push(move)
        move_eval = minimax(board, depth-1, float('-inf'), float('inf'), not maximizing, threshold)
        board.pop()
        
        if maximizing and move_eval > best_eval:
            best_eval = move_eval
            best_move = move
        elif not maximizing and move_eval < best_eval:
            best_eval = move_eval
            best_move = move

    return best_move

# Initialize 
depth = 3
threshold = 1000  # You can set this to the value that you consider a "won" or "lost" game
num_moves = 100

# Initialize an empty PGN
game = chess.pgn.Game()
# Initialize board
board = chess.Board()
# Create a node to represent the game's starting position
node = game.add_variation(chess.Move.from_uci("0000"))
# Play num_moves moves
for i in range(num_moves):
    if board.is_game_over():
        break
    best_move = get_best_move(board=board, depth=depth)
    board.push(best_move)
    node = node.add_variation(best_move)

    # Clear output
    clear_output(wait=True)
    # Display the board
    display(SVG(chess.svg.board(board=board, size=350)))
    print(f'FEN string: {board.fen()}')
    print(f'Stockfish evaluation: {get_stockfish_eval(board.fen())}')
    print(f'Model evaluation: {model.predict(generate_feature_vector(board).reshape(1, -1))}')
    time.sleep(0.1)

exporter = chess.pgn.StringExporter()
pgn_string = game.accept(exporter)


print("PGN of the game:")
print(pgn_string)


KeyboardInterrupt: 