# Extract Chess Openings for python-chess

The idea is, for each FEN position, to know what is the optimal move to play.
The good version to keep is "fen7".

In [103]:
!pip install chess

import os
import pandas as pd
import chess
import chess.pgn



In [104]:
DATA_PATH =  "/kaggle/input/all-chess-openings/openings.csv"
OUTPUT_PATH = "/kaggle/working/openings_fen7.csv"

In [105]:

def create_opening_library(path="openings.csv", output_path="openings_fen.csv"):
    if os.path.exists(output_path):
        print(f"File {output_path} already exists. Please remove it or choose another file name.")
        return

    # Read the source CSV file
    openings_df = pd.read_csv(path)

    # Prepare the output DataFrame
    openings_fen_rows = []

    for _, row in openings_df.iterrows():
        board = chess.Board()
        first_move = True
        moves = eval(row['moves_list'])  # Assuming moves_list is a string representation of a list

        for move_index, move_san in enumerate(moves):
            if move_san[0].isdigit():
                # If the move string starts with a digit, it includes a move number. Split it and take the part after the dot.
                move_san = move_san.split(".")[1]

            try:
                if first_move:
                    # Handle the initial board state here
                    first_move = False
                    initial_fen = board.fen()
                    try:
                        first_move_uci = board.parse_san(move_san).uci()
                    except:
                        print("Impossible to play", next_move, "in", moves)
                        print(board)
                        continue
                # Assuming the initial winning percentage is the same as the first move
                openings_fen_rows.append({'fen': initial_fen, 'best_move': first_move_uci, 'winning_percentage': row['Player Win %']})
                first_move = False
                
                # Parse the SAN to get the move in UCI format
                move = board.parse_san(move_san)
                board.push(move)

                # Check if there's a next move in the moves list
                if move_index + 1 < len(moves):
                    next_move_san = moves[move_index + 1]
                    if next_move_san[0].isdigit():
                        next_move_san = next_move_san.split(".")[1]
                    next_move = board.parse_san(next_move_san)
                    next_move_uci = next_move.uci()

                    new_row = {'fen': board.fen(), 'best_move': next_move_uci, 'winning_percentage': row['Player Win %']}
                    openings_fen_rows.append(new_row)
            except Exception as e:
                print(f"Error processing move: {move_san} in moves: {moves}. Error: {e}")
                continue
                
    openings_fen_df = pd.DataFrame(openings_fen_rows, columns=['fen', 'best_move', 'winning_percentage'])

    # Group by 'fen' and keep the row with the highest 'winning_percentage' for each group
    optimized_openings_fen_df = openings_fen_df.sort_values('winning_percentage', ascending=False).drop_duplicates('fen')

    # Save the optimized DataFrame to CSV
    optimized_openings_fen_df.to_csv(output_path, index=False)
    print(f"Optimized output saved to {output_path}")

# Example usage
create_opening_library(DATA_PATH, OUTPUT_PATH)

op = pd.read_csv(OUTPUT_PATH)
print(op.value_counts())
op.describe()

File /kaggle/working/openings_fen7.csv already exists. Please remove it or choose another file name.
fen                                                                    best_move  winning_percentage
1rbq1rk1/1p2ppbp/p2p1np1/n1pP4/2P5/1PN3P1/P1QNPPBP/R1B2RK1 b - - 0 11  b7b5       31.3                  1
rnbqkb1r/3ppppp/5n2/2pP4/1p2P3/2N5/PP3PPP/R1BQKBNR w KQkq - 0 7        c3b5       37.3                  1
rnbqkb1r/1p3ppp/p2ppn2/8/2BNP3/2N5/PPP2PPP/R1BQK2R w KQkq - 0 7        c4b3       39.0                  1
rnbqkb1r/1p3ppp/p2ppn2/8/3NP1P1/2N1B3/PPP2P1P/R2QKB1R b KQkq - 0 7     e6e5       47.2                  1
rnbqkb1r/1p3ppp/p2ppn2/8/3NP3/1BN5/PPP2PPP/R1BQK2R b KQkq - 1 7        b7b5       39.0                  1
                                                                                                       ..
r1bqkbnr/pppp2pp/2n5/1B2pp2/4P3/2N2N2/PPPP1PPP/R1BQK2R b KQkq - 1 4    f5e4       25.8                  1
r1bqkbnr/pppp2pp/2n5/1B2pp2/4P3/5N2/PPPP1PPP/RNBQK2R w K

Unnamed: 0,winning_percentage
count,2338.0
mean,39.531437
std,10.63409
min,7.5
25%,32.5
50%,39.9
75%,46.1
max,77.6


In [106]:
op = pd.read_csv(OUTPUT_PATH)
op.head(100)

Unnamed: 0,fen,best_move,winning_percentage
0,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w ...,e2e4,77.6
1,rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBN...,g1f3,77.6
2,rnbqkbnr/pppp1ppp/8/4p3/4P3/5N2/PPPP1PPP/RNBQK...,b8c6,77.6
3,r1bqkbnr/pppp1ppp/2n5/4p3/4P3/5N2/PPPP1PPP/RNB...,f1c4,77.6
4,r1bqkbnr/pppp1ppp/2n5/4p3/2B1P3/5N2/PPPP1PPP/R...,g8f6,77.6
...,...,...,...
95,r1bqkb1r/2pp1ppp/p1n2n2/1p2p3/B3P3/5N2/PPPP1PP...,a4b3,57.6
96,r1bqkbnr/1ppp1ppp/p1n5/1B2p3/4P3/5N2/PPPP1PPP/...,b5a4,57.6
97,r1bqkbnr/1ppp1ppp/p1n5/4p3/B3P3/5N2/PPPP1PPP/R...,g8f6,57.6
98,r1bqkb1r/1ppp1ppp/p1n2n2/4p3/B3P3/5N2/PPPP1PPP...,e1g1,57.6


In [107]:
op.describe()

Unnamed: 0,winning_percentage
count,2338.0
mean,39.531437
std,10.63409
min,7.5
25%,32.5
50%,39.9
75%,46.1
max,77.6


In [108]:
op.value_counts()

fen                                                                    best_move  winning_percentage
1rbq1rk1/1p2ppbp/p2p1np1/n1pP4/2P5/1PN3P1/P1QNPPBP/R1B2RK1 b - - 0 11  b7b5       31.3                  1
rnbqkb1r/3ppppp/5n2/2pP4/1p2P3/2N5/PP3PPP/R1BQKBNR w KQkq - 0 7        c3b5       37.3                  1
rnbqkb1r/1p3ppp/p2ppn2/8/2BNP3/2N5/PPP2PPP/R1BQK2R w KQkq - 0 7        c4b3       39.0                  1
rnbqkb1r/1p3ppp/p2ppn2/8/3NP1P1/2N1B3/PPP2P1P/R2QKB1R b KQkq - 0 7     e6e5       47.2                  1
rnbqkb1r/1p3ppp/p2ppn2/8/3NP3/1BN5/PPP2PPP/R1BQK2R b KQkq - 1 7        b7b5       39.0                  1
                                                                                                       ..
r1bqkbnr/pppp2pp/2n5/1B2pp2/4P3/2N2N2/PPPP1PPP/R1BQK2R b KQkq - 1 4    f5e4       25.8                  1
r1bqkbnr/pppp2pp/2n5/1B2pp2/4P3/5N2/PPPP1PPP/RNBQK2R w KQkq - 0 4      b1c3       46.3                  1
r1bqkbnr/ppppp1pp/2n5/5p2/4P3/5N2/PPPP1PPP/RNBQKB1R

In [109]:
import chess
import pandas as pd

def play_auto_chess(openings_df):
    board = chess.Board()
    while not board.is_game_over():
        print(board)
        # Simplify the FEN to focus only on piece positions
        current_fen = ' '.join(board.fen().split(' ')[:4])
        
        # Attempt to find the best move for the current simplified FEN
        possible_moves = openings_df[openings_df['fen'].str.startswith(current_fen)]
        
        if not possible_moves.empty:
            best_move_row = possible_moves.iloc[0]
            best_move = best_move_row['best_move']
            win_percentage = best_move_row['winning_percentage']
            print(f"Best move: {best_move} with win%: {win_percentage}")
            
            # Make the move on the board
            move = chess.Move.from_uci(best_move)
            board.push(move)
        else:
            print("No more known openings, ending game.")
            break
    print("Game over. Result:", board.result())

# Example usage, assuming 'op' is your DataFrame loaded with your new openings data
play_auto_chess(op)


r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . . . . . .
P P P P P P P P
R N B Q K B N R
Best move: e2e4 with win%: 77.6
r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . . P . . .
. . . . . . . .
P P P P . P P P
R N B Q K B N R
Best move: e7e5 with win%: 77.6
r n b q k b n r
p p p p . p p p
. . . . . . . .
. . . . p . . .
. . . . P . . .
. . . . . . . .
P P P P . P P P
R N B Q K B N R
Best move: g1f3 with win%: 77.6
r n b q k b n r
p p p p . p p p
. . . . . . . .
. . . . p . . .
. . . . P . . .
. . . . . N . .
P P P P . P P P
R N B Q K B . R
Best move: b8c6 with win%: 77.6
r . b q k b n r
p p p p . p p p
. . n . . . . .
. . . . p . . .
. . . . P . . .
. . . . . N . .
P P P P . P P P
R N B Q K B . R
Best move: f1c4 with win%: 77.6
r . b q k b n r
p p p p . p p p
. . n . . . . .
. . . . p . . .
. . B . P . . .
. . . . . N . .
P P P P . P P P
R N B Q K . . R
Best move: g8f6 with win%: 77.6
r . b q k b . r
p p p p . p p p
. . n . 