In [1]:
import re
from collections import defaultdict, OrderedDict

import numpy as np
import pandas as pd
import chess

In [2]:
df = pd.read_csv("../data/lichess_db_puzzle.csv", header=None)

In [3]:
df = df[:1000]

In [4]:
df = df.rename(columns={0: "id", 1: "fen", 2: "moves", 3: "rating", 4: "rating deviation", 5: "popularity", 6: "nbplays", 7: "themes", 8: "gameurl"})

In [5]:
def moves_to_san(x):
    "Transform the list of moves to a list of moves in the algebraic notation"
    
    # get fen and puzzule solution in uic format
    fen = x["fen"]
    moves = x["moves"]
    
    # create temporary board
    board = chess.Board(fen)
    
    # make a list of the solution
    moves_list = moves.split(" ")
    san_list = ""
    
    # we need the first fen after the first solution move because it is played by the cpu
    need_fen_after_first_move = True
    
    # iterate over the moves and play it on the temporary board
    for move in moves_list:
        next_move = chess.Move.from_uci(move)
        # get the algebraic notation of the move and append it to the string
        san = board.san(next_move)
        san_list = san_list + " " + san
        board.push(next_move)
        
        # saves the needed first fen
        if need_fen_after_first_move:
            fen_after_first_move = board.fen()
            need_fen_after_first_move = False
            
    return san_list, board.fen(), fen_after_first_move

In [6]:
# Make algebraic notation and clean
df["moves_alg"], df["final_fen"], df["fen_after_first_move"] = zip(*df.apply(moves_to_san, axis=1))

In [7]:
# Split final fen for more information
df[["placement", "active", "castling", "enpassant", "halfmove_clock", "fullmove_clock"]] = df["fen_after_first_move"].str.split(" ", expand=True)

In [8]:
def pieces_and_positions(x):
    "return the number of pieces and the positions as a sorted dictionairy"
    
    # get the column with the FEN placement
    placement = x["placement"]
    
    all_squares = [chess.A1, chess.A2, chess.A3, chess.A4, chess.A5, chess.A6, chess.A7, chess.A8, 
                   chess.B1, chess.B2, chess.B3, chess.B4, chess.B5, chess.B6, chess.B7, chess.B8, 
                   chess.C1, chess.C2, chess.C3, chess.C4, chess.C5, chess.C6, chess.C7, chess.C8, 
                   chess.D1, chess.D2, chess.D3, chess.D4, chess.D5, chess.D6, chess.D7, chess.D8, 
                   chess.E1, chess.E2, chess.E3, chess.E4, chess.E5, chess.E6, chess.E7, chess.E8, 
                   chess.F1, chess.F2, chess.F3, chess.F4, chess.F5, chess.F6, chess.F7, chess.F8, 
                   chess.G1, chess.G2, chess.G3, chess.G4, chess.G5, chess.G6, chess.G7, chess.G8, 
                   chess.H1, chess.H2, chess.H3, chess.H4, chess.H5, chess.H6, chess.H7, chess.H8]
    
    # create temporary board
    board = chess.Board(placement)
    
    # count all pieces from the FEN placement
    number_pieces = len(re.findall(r"[pPrRbBnNqQkK]", placement))
           
    # create defaultdict with lists
    positions_dict = defaultdict(list)
    # iterate over all possible squares and if there is a piece append 
    # it to the default dict with the piece as key and position as value
    for square in all_squares:
            if piece := board.piece_at(square):
                positions_dict[str(piece)].append(chess.square_name(square))
    # order from white pawn to black king
    order = {"p": 6, "P": 0, "r": 7, "R": 1, "b": 8, "B": 2, "n": 9, "N": 3, "q": 10, "Q": 4, "k": 11, "K": 5,}
    positions = dict(OrderedDict(sorted(positions_dict.items(), key=lambda val: order[val[0]])))
    
    return number_pieces, positions

In [9]:
# Count number of pieces, get all piece positions
df["number_of_pieces"], df["positions"] = zip(*df.apply(pieces_and_positions, axis=1))

In [10]:
# Drop columns
df = df.drop(columns=["id", "fen", "final_fen", "rating deviation", "popularity", "nbplays", "halfmove_clock", "fullmove_clock"])

In [11]:
# Create IDs
df['id'] = np.arange(1, df.shape[0] + 1)

In [None]:
df.to_csv("../data/puzzles_alg.csv", index=False)

In [12]:
df.to_json("../data/puzzles_alg.json", orient="records")

# Testing

In [None]:
df.head()

In [None]:
placement = df.iloc[0, :]["placement"]

In [None]:
board = chess.Board(placement)