In [1]:
import chess
import pandas as pd
import json

def load_game_data(file_name):
    file_path = f"../../../data/processed/{file_name}.json"
    with open(file_path, 'r') as file:
        games = json.load(file)
    return games

def classify_positions(game, middle_index):
    board = chess.Board()
    positions = []

    for move_number, san in enumerate(game['moves'].split(), start=1):
        move = board.parse_san(san)
        board.push(move)
        fen = board.fen()
        
        if move_number <= middle_index:
            game_state = "opening"
        else:
            game_state = "middlegame"
        
        positions.append({'position': fen, 'game_state': game_state})
    
    return positions

def analyze_games(games):
    all_positions = []

    for game in games:
        middle_index = game['division'].get('middle', 25)
        positions = classify_positions(game, middle_index)
        all_positions.extend(positions)
    
    return pd.DataFrame(all_positions)



In [13]:
import numpy as np
import chess

def fen_to_input(fen):
    board = chess.Board(fen)
    input_matrix = np.zeros((8, 8, 12), dtype=int)

    piece_to_plane = {
        'P': 0, 'N': 1, 'B': 2, 'R': 3, 'Q': 4, 'K': 5,
        'p': 6, 'n': 7, 'b': 8, 'r': 9, 'q': 10, 'k': 11
    }

    for rank in range(8):
        for file in range(8):
            piece = board.piece_at(chess.square(file, 7 - rank))
            if piece:
                plane = piece_to_plane[piece.symbol()]
                input_matrix[rank, file, plane] = 1

    additional_planes = np.zeros((8, 8, 5), dtype=int)
    
    if board.has_kingside_castling_rights(chess.WHITE):
        additional_planes[:, :, 0] = 1
    if board.has_queenside_castling_rights(chess.WHITE):
        additional_planes[:, :, 1] = 1
    if board.has_kingside_castling_rights(chess.BLACK):
        additional_planes[:, :, 2] = 1
    if board.has_queenside_castling_rights(chess.BLACK):
        additional_planes[:, :, 3] = 1

    if board.ep_square:
        ep_rank = chess.square_rank(board.ep_square)
        ep_file = chess.square_file(board.ep_square)
        additional_planes[7 - ep_rank, ep_file, 4] = 1

    side_to_move_plane = np.ones((8, 8, 1), dtype=int) if board.turn == chess.WHITE else np.zeros((8, 8, 1), dtype=int)
    
    input_tensor = np.concatenate((input_matrix, additional_planes, side_to_move_plane), axis=2)
    
    return input_tensor

# Example usage
fen = "r1bqkbnr/pppp2pp/2n2p2/1B2p3/4P3/2N2N2/PPPP1PPP/R1BQK2R b KQkq - 1 4"
input_tensor = fen_to_input(fen)
print(input_tensor[1]) 
print()
print(input_tensor[2]) 
print()
print(input_tensor[7]) 


[[0 0 0 0 0 0 1 0 0 0 0 0 1 1 1 1 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 1 1 1 1 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 1 1 1 1 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 1 1 1 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 1 1 1 1 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 1 1 1 1 0 0]]

[[0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0]
 [0 0 0 0 0 0 0 1 0 0 0 0 1 1 1 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 1 1 1 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0]]

[[0 0 0 1 0 0 0 0 0 0 0 0 1 1 1 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0]
 [0 0 0 0 1 0 0 0 0 0 0 0 1 1 1 1 0 0]
 [0 0 0 0 0 1 0 0 0 0 0 0 1 1 1 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0]
 [0 0 0 1 0 0 0 0 0 0 0 0 1 1 1 1 0 0]]


In [17]:

# Load games
games = load_game_data('apendra_games_processed')

# Analyze games and get positions with classifications
results = analyze_games(games)

print(results)
# Save results to JSON (optional)
#results.to_json("positions_classified.json", orient='records', lines=True)
# Remove duplicate positions
results = results.drop_duplicates(subset='position')

# Convert FEN to input tensor and add to DataFrame
results['input'] = results['position'].apply(lambda fen: fen_to_input(fen).flatten())

# Save results to JSON (optional# Save results to CSV
results.to_csv("opening_classified.csv", index=False)



                                                position  game_state
0      rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR ...     opening
1      rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBN...     opening
2      rnbqkbnr/pppp1ppp/8/4p3/4P3/2N5/PPPP1PPP/R1BQK...     opening
3      r1bqkbnr/pppp1ppp/2n5/4p3/4P3/2N5/PPPP1PPP/R1B...     opening
4      r1bqkbnr/pppp1ppp/2n5/4p3/2B1P3/2N5/PPPP1PPP/R...     opening
...                                                  ...         ...
53943  r3r1k1/ppp2ppp/5q2/4b2Q/4P3/2N4P/PPPB1PP1/R4RK...  middlegame
53944  r3r1k1/ppp2ppp/5q2/4b2Q/4P3/2N4P/PPPB1PP1/1R3R...  middlegame
53945  3rr1k1/ppp2ppp/5q2/4b2Q/4P3/2N4P/PPPB1PP1/1R3R...  middlegame
53946  3rr1k1/ppp2ppp/5q2/3Nb2Q/4P3/7P/PPPB1PP1/1R3RK...  middlegame
53947  3rr1k1/ppp2ppp/3q4/3Nb2Q/4P3/7P/PPPB1PP1/1R3RK...  middlegame

[53948 rows x 2 columns]
                                                position  game_state  \
0      rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR ...     opening   
1 

In [25]:

results.to_csv("opening_classified.csv", index=False)
# Display the DataFrame
print(results.head)

<bound method NDFrame.head of                                                 position  game_state  \
0      rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR ...     opening   
1      rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBN...     opening   
2      rnbqkbnr/pppp1ppp/8/4p3/4P3/2N5/PPPP1PPP/R1BQK...     opening   
3      r1bqkbnr/pppp1ppp/2n5/4p3/4P3/2N5/PPPP1PPP/R1B...     opening   
4      r1bqkbnr/pppp1ppp/2n5/4p3/2B1P3/2N5/PPPP1PPP/R...     opening   
...                                                  ...         ...   
53943  r3r1k1/ppp2ppp/5q2/4b2Q/4P3/2N4P/PPPB1PP1/R4RK...  middlegame   
53944  r3r1k1/ppp2ppp/5q2/4b2Q/4P3/2N4P/PPPB1PP1/1R3R...  middlegame   
53945  3rr1k1/ppp2ppp/5q2/4b2Q/4P3/2N4P/PPPB1PP1/1R3R...  middlegame   
53946  3rr1k1/ppp2ppp/5q2/3Nb2Q/4P3/7P/PPPB1PP1/1R3RK...  middlegame   
53947  3rr1k1/ppp2ppp/3q4/3Nb2Q/4P3/7P/PPPB1PP1/1R3RK...  middlegame   

                                                   input  
0      [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,