In [4]:
import chess
import chess.engine
import pandas as pd
import xgboost as xgb
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
import pandas as pd
from chess import Board, Move
from joblib import Parallel, delayed


In [33]:
# dataset = pd.read_csv('data/lichess_db_puzzle.csv', index_col=0)
dataset = pd.read_csv('data/test_data_set.csv', index_col=0)

In [34]:
dataset.head()

Unnamed: 0_level_0,FEN,Moves
PuzzleId,Unnamed: 1_level_1,Unnamed: 2_level_1
p0003,2R5/3rq1k1/p6p/1p3pNQ/Pb2p3/4P2P/5PP1/6K1 w - ...,c8e8 e7g5 e8g8 g7g8
p0027,7r/2b2pk1/Q4np1/p2pp1R1/qp3P2/1N6/1PP4P/4R2K b...,e5f4 g5g6 g7g6 e1g1 g6f5 b3d4
p0025,5qk1/p1pR1p2/1b3Rp1/4r3/1r6/7P/PP4P1/1B1Q3K b ...,e5e7 b1g6 e7d7 d1d7
p0029,4rrk1/pp3pp1/2p5/6qb/P1PP4/4nRB1/1P1QN1B1/R5K1...,a1a3 h5f3 g2f3 f7f5
p0031,4r1k1/1qr3Bp/p3pPp1/1p2Q3/8/2P2p2/PP1R1PPP/6K1...,e5e6 e8e6 d2d8 e6e8


In [35]:
material_value_dict = {
    1: 1,  # Pawn
    2: 3,  # Knight
    3: 3,  # Bishop
    4: 5,  # Rook
    5: 9,  # Queen
}


def material_value(piece_type):
    return material_value_dict.get(
        piece_type, 0
    )  # Default to 0 for King or any other piece type


# Optimized helper functions
def process_row(fen, moves, puzzle_id, ranking=None):
    board = Board(fen)
    result = {}
    moves_list = moves.split(" ")
    board.push(Move.from_uci(moves_list[0]))
    

    # Features extraction before moves
    result["moves_count"] = len(moves_list)
    result["pieces_count"] = len(board.piece_map())

    result["material_current_player"] = sum(
        material_value(piece.piece_type)
        for piece in board.piece_map().values()
        if piece.color == board.turn
    )
    result["material_next_player"] = sum(
        material_value(piece.piece_type)
        for piece in board.piece_map().values()
        if piece.color != board.turn
    )
    result["meteral_difference"] = result["material_current_player"] - result["material_next_player"]
    
    result["moved_piece_type"] = (
        board.piece_at(Move.from_uci(moves_list[1]).from_square).symbol().lower()
    )
    

    ## Piece activity
    result["legal_moves_current_player"] = len(list(board.legal_moves))
    board.push(Move.from_uci(moves_list[1]))
    result["legal_moves_next_player"] = len(list(board.legal_moves))

    # Moves processing
    for move in moves_list[2:]:
        board.push(Move.from_uci(move))

    # Features extraction after moves
    result["material_after_moves_current_player"] = sum(
        material_value(piece.piece_type)
        for piece in board.piece_map().values()
        if piece.color == board.turn
    )
    result["material_after_moves_next_player"] = sum(
        material_value(piece.piece_type)
        for piece in board.piece_map().values()
        if piece.color != board.turn
    )
    result["ends_with_check"] = board.is_checkmate()
    
    result["PuzzleId"] = puzzle_id
    
    if ranking is not None:
        result["Ranking"] = ranking

    return result


# Main function
def create_features_parallel(dataset, train=True, n_jobs=-1):
    if train:
        results = Parallel(n_jobs=n_jobs)(
            delayed(process_row)(row["FEN"], row["Moves"], puzzle_id, row["Rating"]) for puzzle_id, row in dataset.iterrows()
        )
    else:
        results = Parallel(n_jobs=n_jobs)(
            delayed(process_row)(row["FEN"], row["Moves"], puzzle_id, None) for puzzle_id, row in dataset.iterrows()
        )
    
    features = pd.DataFrame(results).set_index("PuzzleId")

    # Post-processing
    features["moved_piece_type"] = features["moved_piece_type"].astype("category")
    features["moved_piece_type"] = features["moved_piece_type"].cat.set_categories(
        ["b", "k", "n", "p", "q", "r"]
    )
    
    features = pd.get_dummies(features, columns=['moved_piece_type'], dtype=int)
    features["ends_with_check"] = features["ends_with_check"].astype(int)
    
    return features


In [36]:
X = create_features_parallel(dataset, train=False)

In [39]:
X.to_csv('data/test_features_v2.csv', index=True)