# Hikaru Game Analysis

Dependencies and functions for transforming the pgn data into pandas DataFrames

In [None]:
import chess.pgn as ch
import pandas as pd
import io
import re

def extract_game_data(game, game_id):
    """Extracts metadata and move data (with times) for a single PGN game."""
    # --- Metadata ---
    metadata_df = pd.DataFrame([dict(game.headers)])
    metadata_df["Game_ID"] = game_id
    metadata_df.set_index("Game_ID", inplace=True)

    # --- Move extraction ---
    move_data = []
    node = game
    move_num = 1
    white_move = None
    white_time = None
    black_move = None
    black_time = None

    clk_pattern = re.compile(r"\[%clk ([0-9:.\-]+)\]")

    while node.variations:
        next_node = node.variation(0)
        move_san = next_node.san()
        comment = next_node.comment

        clk_match = clk_pattern.search(comment)
        clk_time = clk_match.group(1) if clk_match else None

        if node.board().turn:  # White to move
            white_move = move_san
            white_time = clk_time
        else:
            black_move = move_san
            black_time = clk_time
            move_data.append({
                "move_number": move_num,
                "white_move": white_move,
                "white_time": white_time,
                "black_move": black_move,
                "black_time": black_time
            })
            move_num += 1

        node = next_node

    # --- Moves DataFrame ---
    if move_data:
        moves_df = pd.DataFrame(move_data)
        moves_df.set_index("move_number", inplace=True)
    else:
        # Empty DataFrame if no moves
        moves_df = pd.DataFrame(columns=["white_move", "white_time", "black_move", "black_time"])
        moves_df.index.name = "move_number"

    return metadata_df, moves_df


def extract_all_games(pgn_path):
    """Reads a PGN file, extracts metadata and move data for all games."""
    all_metadata = []
    moves_dict = {}
    game_id = 1

    with open(pgn_path) as pgn:
        while True:
            game = ch.read_game(pgn)
            if game is None:
                break  # End of file

            metadata_df, moves_df = extract_game_data(game, game_id)

            # Only include games with at least one move
            if not moves_df.empty:
                all_metadata.append(metadata_df)
                moves_dict[game_id] = moves_df
                game_id += 1

    # Combine all metadata DataFrames
    if all_metadata:
        metadata_df = pd.concat(all_metadata)
    else:
        metadata_df = pd.DataFrame()

    return metadata_df, moves_dict

Data extraction from GM Hikaru's chess.com game history

In [None]:
# Path to your PGN file
pgn_path = "GMHikaruOnTwitch.pgn"

# Extract all games
metadata_df, moves_dict = extract_all_games(pgn_path)

# --- Preview ---
print("=== METADATA (first 5 games) ===")
print(metadata_df.head())

print("\n=== MOVES for Game 1 (first 10 moves) ===")
print(moves_dict[1].head(10))

KeyError: "None of ['move_number'] are in the columns"