In [38]:
import re
from collections import defaultdict, OrderedDict

import numpy as np
import pandas as pd
import chess

In [2]:
df = pd.read_csv("../data/lichess_db_puzzle.csv", header=None)

In [3]:
df = df[:100]

In [4]:
df = df.rename(columns={0: "id", 1: "fen", 2: "moves", 3: "rating", 4: "rating deviation", 5: "popularity", 6: "nbplays", 7: "themes", 8: "gameurl"})

In [5]:
def moves_to_san(x):
    "Transform the list of moves to a list of moves in the algebraic notation"
    
    # get fen and puzzule solution in uic format
    fen = x["fen"]
    moves = x["moves"]
    
    # create temporary board
    board = chess.Board(fen)
    
    # make a list of the solution
    moves_list = moves.split(" ")
    san_list = ""
    
    # we need the first fen after the first solution move because it is played by the cpu
    need_fen_after_first_move = True
    
    # iterate over the moves and play it on the temporary board
    for move in moves_list:
        next_move = chess.Move.from_uci(move)
        # get the algebraic notation of the move and append it to the string
        san = board.san(next_move)
        san_list = san_list + " " + san
        board.push(next_move)
        
        # saves the needed first fen
        if need_fen_after_first_move:
            fen_after_first_move = board.fen()
            need_fen_after_first_move = False
            
    return san_list, board.fen(), fen_after_first_move

In [6]:
# Make algebraic notation and clean
df["moves_alg"], df["final_fen"], df["fen_after_first_move"] = zip(*df.apply(moves_to_san, axis=1))

In [7]:
# Split final fen for more information
df[["placement", "active", "castling", "enpassant", "halfmove_clock", "fullmove_clock"]] = df["fen_after_first_move"].str.split(" ", expand=True)

In [43]:
def pieces_and_positions(x):
    "return the number of pieces and the positions as a sorted dictionairy"
    
    # get the column with the FEN placement
    placement = x["placement"]
    
    all_squares = [chess.A1, chess.A2, chess.A3, chess.A4, chess.A5, chess.A6, chess.A7, chess.A8, 
                   chess.B1, chess.B2, chess.B3, chess.B4, chess.B5, chess.B6, chess.B7, chess.B8, 
                   chess.C1, chess.C2, chess.C3, chess.C4, chess.C5, chess.C6, chess.C7, chess.C8, 
                   chess.D1, chess.D2, chess.D3, chess.D4, chess.D5, chess.D6, chess.D7, chess.D8, 
                   chess.E1, chess.E2, chess.E3, chess.E4, chess.E5, chess.E6, chess.E7, chess.E8, 
                   chess.F1, chess.F2, chess.F3, chess.F4, chess.F5, chess.F6, chess.F7, chess.F8, 
                   chess.G1, chess.G2, chess.G3, chess.G4, chess.G5, chess.G6, chess.G7, chess.G8, 
                   chess.H1, chess.H2, chess.H3, chess.H4, chess.H5, chess.H6, chess.H7, chess.H8]
    
    # create temporary board
    board = chess.Board(placement)
    
    # count all pieces from the FEN placement
    number_pieces = len(re.findall(r"[pPrRbBnNqQkK]", placement))
           
    # create defaultdict with lists
    positions_dict = defaultdict(list)
    # iterate over all possible squares and if there is a piece append 
    # it to the default dict with the piece as key and position as value
    for square in all_squares:
            if piece := board.piece_at(square):
                positions_dict[str(piece)].append(chess.square_name(square))
    # order from white pawn to black king
    order = {"p": 6, "P": 0, "r": 7, "R": 1, "b": 8, "B": 2, "n": 9, "N": 3, "q": 10, "Q": 4, "k": 11, "K": 5,}
    positions = dict(OrderedDict(sorted(positions_dict.items(), key=lambda val: order[val[0]])))
    
    return number_pieces, positions

In [44]:
# Count number of pieces, get all piece positions
df["number_of_pieces"], df["positions"] = zip(*df.apply(pieces_and_positions, axis=1))

In [10]:
# Drop columns
df = df.drop(columns=["id", "fen", "final_fen", "rating deviation", "popularity", "nbplays", "halfmove_clock", "fullmove_clock"])

In [11]:
# Create IDs
df['id'] = np.arange(1, df.shape[0] + 1)

In [None]:
df.to_csv("../data/puzzles_alg.csv", index=False)

In [None]:
df.to_json("../data/puzzles_alg.json", orient="records")

# Testing

In [41]:
df.head()

Unnamed: 0,fen,moves,rating,themes,gameurl,moves_alg,final_fen,fen_after_first_move,placement,active,castling,enpassant,number_of_pieces,positions,id
0,5rk1/1p3ppp/pq3b2/8/8/1P1Q1N2/P4PPP/3R2K1 w - ...,d3d6 f8d8 d6d8 f6d8,1488,advantage endgame short,https://lichess.org/F8M8OS71#53,Qd6 Rd8 Qxd8+ Bxd8,3b2k1/1p3ppp/pq6/8/8/1P3N2/P4PPP/3R2K1 w - - 0 29,5rk1/1p3ppp/pq1Q1b2/8/8/1P3N2/P4PPP/3R2K1 b - ...,5rk1/1p3ppp/pq1Q1b2/8/8/1P3N2/P4PPP/3R2K1,b,-,-,18,"{'P': ['a2', 'b3', 'f2', 'g2', 'h2'], 'R': ['d...",1
1,r2qr1k1/b1p2ppp/pp4n1/P1P1p3/4P1n1/B2P2Pb/3NBP...,b6c5 e2g4 h3g4 d1g4,1145,advantage middlegame short,https://lichess.org/4MWQCxQ6/black#32,bxc5 Bxg4 Bxg4 Qxg4,r2qr1k1/b1p2ppp/p5n1/P1p1p3/4P1Q1/B2P2P1/3N1P1...,r2qr1k1/b1p2ppp/p5n1/P1p1p3/4P1n1/B2P2Pb/3NBP1...,r2qr1k1/b1p2ppp/p5n1/P1p1p3/4P1n1/B2P2Pb/3NBP1...,w,-,-,29,"{'P': ['a5', 'd3', 'e4', 'f2', 'g3', 'h2'], 'R...",2
2,r4rk1/pp3ppp/2n1b3/q1pp2B1/8/P1Q2NP1/1PP1PP1P/...,g5e7 a5c3 b2c3 c6e7,1407,advantage master middlegame short,https://lichess.org/iihZGl6t#29,Be7 Qxc3 bxc3 Nxe7,r4rk1/pp2nppp/4b3/2pp4/8/P1P2NP1/2P1PP1P/2KR3R...,r4rk1/pp2Bppp/2n1b3/q1pp4/8/P1Q2NP1/1PP1PP1P/2...,r4rk1/pp2Bppp/2n1b3/q1pp4/8/P1Q2NP1/1PP1PP1P/2...,b,-,-,26,"{'P': ['a3', 'b2', 'c2', 'e2', 'f2', 'g3', 'h2...",3
3,3q1rk1/1pp3pp/5p1P/4pPP1/rb1pP3/3P1N2/b1P1B3/2...,d8a8 g5g6 h7g6 h6g7,2306,advancedPawn crushing kingsideAttack middlegam...,https://lichess.org/FLmpZbTm/black#52,Qa8 g6 hxg6 hxg7,q4rk1/1pp3P1/5pp1/4pP2/rb1pP3/3P1N2/b1P1B3/2QK...,q4rk1/1pp3pp/5p1P/4pPP1/rb1pP3/3P1N2/b1P1B3/2Q...,q4rk1/1pp3pp/5p1P/4pPP1/rb1pP3/3P1N2/b1P1B3/2Q...,w,-,-,25,"{'P': ['c2', 'd3', 'e4', 'f5', 'g5', 'h6'], 'R...",4
4,4r3/5pk1/1p3np1/3p3p/2qQ4/P4N1P/1P3RP1/7K w - ...,d4b6 f6e4 h1g1 e4f2,2038,crushing endgame short trappedPiece,https://lichess.org/GeXqsW90#67,Qxb6 Ne4 Kg1 Nxf2,4r3/5pk1/1Q4p1/3p3p/2q5/P4N1P/1P3nP1/6K1 w - -...,4r3/5pk1/1Q3np1/3p3p/2q5/P4N1P/1P3RP1/7K b - -...,4r3/5pk1/1Q3np1/3p3p/2q5/P4N1P/1P3RP1/7K,b,-,-,16,"{'P': ['a3', 'b2', 'g2', 'h3'], 'R': ['f2'], '...",5


In [17]:
placement = df.iloc[0, :]["placement"]

In [20]:
board = chess.Board(placement)

In [45]:
df.iloc[0, :]["positions"]

{'P': ['a2', 'b3', 'f2', 'g2', 'h2'],
 'R': ['d1'],
 'N': ['f3'],
 'Q': ['d6'],
 'K': ['g1'],
 'p': ['a6', 'b7', 'f7', 'g7', 'h7'],
 'r': ['f8'],
 'b': ['f6'],
 'q': ['b6'],
 'k': ['g8']}

In [12]:
df.head()

Unnamed: 0,fen,moves,rating,themes,gameurl,moves_alg,final_fen,fen_after_first_move,placement,active,castling,enpassant,number_of_pieces,positions,id
0,5rk1/1p3ppp/pq3b2/8/8/1P1Q1N2/P4PPP/3R2K1 w - ...,d3d6 f8d8 d6d8 f6d8,1488,advantage endgame short,https://lichess.org/F8M8OS71#53,Qd6 Rd8 Qxd8+ Bxd8,3b2k1/1p3ppp/pq6/8/8/1P3N2/P4PPP/3R2K1 w - - 0 29,5rk1/1p3ppp/pq1Q1b2/8/8/1P3N2/P4PPP/3R2K1 b - ...,5rk1/1p3ppp/pq1Q1b2/8/8/1P3N2/P4PPP/3R2K1,b,-,-,18,"P a2, p a6, P b3, q b6, p b7, R d1, Q d6, P f2...",1
1,r2qr1k1/b1p2ppp/pp4n1/P1P1p3/4P1n1/B2P2Pb/3NBP...,b6c5 e2g4 h3g4 d1g4,1145,advantage middlegame short,https://lichess.org/4MWQCxQ6/black#32,bxc5 Bxg4 Bxg4 Qxg4,r2qr1k1/b1p2ppp/p5n1/P1p1p3/4P1Q1/B2P2P1/3N1P1...,r2qr1k1/b1p2ppp/p5n1/P1p1p3/4P1n1/B2P2Pb/3NBP1...,r2qr1k1/b1p2ppp/p5n1/P1p1p3/4P1n1/B2P2Pb/3NBP1...,w,-,-,29,"R a1, B a3, P a5, p a6, b a7, r a8, N b1, p c5...",2
2,r4rk1/pp3ppp/2n1b3/q1pp2B1/8/P1Q2NP1/1PP1PP1P/...,g5e7 a5c3 b2c3 c6e7,1407,advantage master middlegame short,https://lichess.org/iihZGl6t#29,Be7 Qxc3 bxc3 Nxe7,r4rk1/pp2nppp/4b3/2pp4/8/P1P2NP1/2P1PP1P/2KR3R...,r4rk1/pp2Bppp/2n1b3/q1pp4/8/P1Q2NP1/1PP1PP1P/2...,r4rk1/pp2Bppp/2n1b3/q1pp4/8/P1Q2NP1/1PP1PP1P/2...,b,-,-,26,"P a3, q a5, p a7, r a8, P b2, p b7, K c1, P c2...",3
3,3q1rk1/1pp3pp/5p1P/4pPP1/rb1pP3/3P1N2/b1P1B3/2...,d8a8 g5g6 h7g6 h6g7,2306,advancedPawn crushing kingsideAttack middlegam...,https://lichess.org/FLmpZbTm/black#52,Qa8 g6 hxg6 hxg7,q4rk1/1pp3P1/5pp1/4pP2/rb1pP3/3P1N2/b1P1B3/2QK...,q4rk1/1pp3pp/5p1P/4pPP1/rb1pP3/3P1N2/b1P1B3/2Q...,q4rk1/1pp3pp/5p1P/4pPP1/rb1pP3/3P1N2/b1P1B3/2Q...,w,-,-,25,"b a2, r a4, q a8, b b4, p b7, Q c1, P c2, p c7...",4
4,4r3/5pk1/1p3np1/3p3p/2qQ4/P4N1P/1P3RP1/7K w - ...,d4b6 f6e4 h1g1 e4f2,2038,crushing endgame short trappedPiece,https://lichess.org/GeXqsW90#67,Qxb6 Ne4 Kg1 Nxf2,4r3/5pk1/1Q4p1/3p3p/2q5/P4N1P/1P3nP1/6K1 w - -...,4r3/5pk1/1Q3np1/3p3p/2q5/P4N1P/1P3RP1/7K b - -...,4r3/5pk1/1Q3np1/3p3p/2q5/P4N1P/1P3RP1/7K,b,-,-,16,"P a3, P b2, Q b6, q c4, p d5, r e8, R f2, N f3...",5


In [16]:
positions = {"p": [], "P":[], "r": [], "R": [], "b": [], "B": [], "n": [], "N": [], "q": [], "Q": [], "k": [], "K": [],}

In [14]:
all_squares = [chess.A1, chess.A2, chess.A3, chess.A4, chess.A5, chess.A6, chess.A7, chess.A8, 
                   chess.B1, chess.B2, chess.B3, chess.B4, chess.B5, chess.B6, chess.B7, chess.B8, 
                   chess.C1, chess.C2, chess.C3, chess.C4, chess.C5, chess.C6, chess.C7, chess.C8, 
                   chess.D1, chess.D2, chess.D3, chess.D4, chess.D5, chess.D6, chess.D7, chess.D8, 
                   chess.E1, chess.E2, chess.E3, chess.E4, chess.E5, chess.E6, chess.E7, chess.E8, 
                   chess.F1, chess.F2, chess.F3, chess.F4, chess.F5, chess.F6, chess.F7, chess.F8, 
                   chess.G1, chess.G2, chess.G3, chess.G4, chess.G5, chess.G6, chess.G7, chess.G8, 
                   chess.H1, chess.H2, chess.H3, chess.H4, chess.H5, chess.H6, chess.H7, chess.H8]

In [26]:
from collections import defaultdict, OrderedDict

positions_dict = defaultdict(list)

for square in all_squares:
        if piece := board.piece_at(square):
            positions_dict[str(piece)].append(chess.square_name(square))

P a2
p a6
P b3
q b6
p b7
R d1
Q d6
P f2
N f3
b f6
p f7
r f8
K g1
P g2
p g7
k g8
P h2
p h7


In [27]:
dict(positions_dict)

{'P': ['a2', 'b3', 'f2', 'g2', 'h2'],
 'p': ['a6', 'b7', 'f7', 'g7', 'h7'],
 'q': ['b6'],
 'R': ['d1'],
 'Q': ['d6'],
 'N': ['f3'],
 'b': ['f6'],
 'r': ['f8'],
 'K': ['g1'],
 'k': ['g8']}

In [37]:
order = {"p": 6, "P": 0, "r": 7, "R": 1, "b": 8, "B": 2, "n": 9, "N": 3, "q": 10, "Q": 4, "k": 11, "K": 5,}
  
OrderedDict(sorted(positions_dict.items(), key=lambda val: order[val[0]]))

OrderedDict([('P', ['a2', 'b3', 'f2', 'g2', 'h2']),
             ('R', ['d1']),
             ('N', ['f3']),
             ('Q', ['d6']),
             ('K', ['g1']),
             ('p', ['a6', 'b7', 'f7', 'g7', 'h7']),
             ('r', ['f8']),
             ('b', ['f6']),
             ('q', ['b6']),
             ('k', ['g8'])])

In [36]:
sort_pieces("p")

6