In [None]:
import os
import requests
import json

# Define headers with User-Agent for API requests
headers = {
    'User-Agent': 'ChessDataFetcher/1.0 (ardilyuce@gmail.com)'  # Replace with your email
}

def fetch_game_archives(username):
    """
    Fetch the game archives URLs for the player.
    """
    url = f'https://api.chess.com/pub/player/{username}/games/archives'
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print(f"Error fetching archives: HTTP {response.status_code}, {response.text}")
        return []
    archives = response.json().get('archives', [])
    print(f"Archives fetched: {len(archives)} archives found.")
    return archives

def fetch_games_from_archive(archive_url):
    """
    Fetch all games from a given archive URL.
    """
    response = requests.get(archive_url, headers=headers)
    if response.status_code != 200:
        print(f"Error fetching games from {archive_url}: HTTP {response.status_code}, {response.text}")
        return []
    games = response.json().get('games', [])
    print(f"Games fetched from {archive_url}: {len(games)} games.")
    return games

def fetch_all_games(username):
    """
    Fetch all games by iterating through their game archives.
    """
    all_games = []
    archives = fetch_game_archives(username)
    if not archives:
        print("No archives fetched. Ensure the username is correct or the Chess.com API is accessible.")
        return all_games

    for archive_url in archives:
        print(f"Fetching games from archive: {archive_url}")
        games = fetch_games_from_archive(archive_url)
        all_games.extend(games)

    print(f"Total games fetched: {len(all_games)} games.")
    return all_games

def save_to_json(data, filename):
    """
    Save data to a beautified JSON file.
    """
    print(f"Saving data to {filename}...")
    if not data:
        print("No data to save. The file will not be updated.")
        return

    with open(filename, "w") as file:
        json.dump(data, file, indent=4)
    print(f"Data saved successfully to {filename}. File size: {os.path.getsize(filename)} bytes")

if __name__ == "__main__":
    username = "ardil30"  # Chess.com username
    file_path = "./data/raw/ardil30_raw_games.json"

    # Ensure the raw data directory exists
    os.makedirs(os.path.dirname(file_path), exist_ok=True)

    # Fetch new games
    all_games = fetch_all_games(username)

    # Save fetched data
    save_to_json(all_games, file_path)


In [None]:
import os
import json
import pandas as pd
from datetime import datetime

def load_openings(file_path):
    """
    Load a list of main openings from a text file.
    """
    # Ensure the file exists
    if not os.path.exists(file_path):
        with open(file_path, "w") as file:
            pass  # Create an empty file

    with open(file_path, "r") as file:
        return [line.strip() for line in file if line.strip()]

def add_opening_to_file(file_path, opening):
    """
    Add a new opening to the openings file if it doesn't already exist.
    """
    openings = load_openings(file_path)
    if opening not in openings:
        with open(file_path, "a") as file:
            file.write(opening + "\n")
        print(f"New opening added to {file_path}: {opening}")

def split_pgn(pgn):
    """
    Split PGN into metadata (headers) and move list.
    Format the metadata as a dictionary for better readability.
    """
    if not pgn:
        return {"Information": {}, "Moves": "No Moves available"}

    try:
        parts = pgn.split("\n\n")  # Split PGN into headers and moves
        metadata_lines = parts[0].strip().split("\n")  # Split headers by lines
        moves = parts[1].strip() if len(parts) > 1 else "No Moves available"  # Moves section

        # Convert metadata lines to a dictionary
        metadata = {}
        for line in metadata_lines:
            if line.startswith("[") and line.endswith("]"):
                key_value = line[1:-1].split(" ", 1)  # Remove brackets and split by the first space
                if len(key_value) == 2:
                    key, value = key_value
                    metadata[key] = value.strip('"')  # Remove quotes around values

        return {"Information": metadata, "Moves": moves}
    except IndexError:
        return {"Information": {}, "Moves": "Invalid PGN format"}

def determine_game_result(game, username):
    """
    Determine the result of the game for the given player.
    """
    white_player = game.get("white", {}).get("username", "").lower()
    black_player = game.get("black", {}).get("username", "").lower()
    white_result = game.get("white", {}).get("result", "").lower()
    black_result = game.get("black", {}).get("result", "").lower()

    if username.lower() == white_player.strip().lower():
        if white_result == "win":
            return "Win"
        elif white_result in ["checkmated", "timeout", "resigned", "abandoned"]:
            return "Loss"
        elif white_result in ["stalemate", "draw", "insufficient material", "insufficient", "repetition", "agreed", "50move", "timevsinsufficient"]:
            return "Draw"
    elif username.lower() == black_player.strip().lower():
        if black_result == "win":
            return "Win"
        elif black_result in ["checkmated", "timeout", "resigned", "abandoned"]:
            return "Loss"
        elif black_result in ["stalemate", "draw", "insufficient material", "insufficient", "repetition", "agreed", "50move", "timevsinsufficient"]:
            return "Draw"
    return "Unknown"

def process_game_data(all_games, username, openings_file):
    """
    Beautify and structure game data with main openings and variations.
    """
    def unix_to_readable(unix_time):
        return datetime.utcfromtimestamp(unix_time).strftime('%Y-%m-%d %H:%M:%S')

    # Load main openings from the text file
    main_openings = load_openings(openings_file)

    game_list = []
    for game in all_games:
        pgn_split = split_pgn(game.get("pgn", ""))

        # Extract opening name
        opening_name = game.get("eco", "Unknown").split("/")[-1].replace("-", " ").capitalize()

        # Determine game result
        game_result = determine_game_result(game, username)

        # Determine main opening and variation
        main_opening = "Unknown"
        variation = opening_name

        for main in main_openings:
            if opening_name.lower().find(main.lower()) != -1:
                main_opening = main
                variation = opening_name.strip().capitalize()
                break

        # Add unknown opening to the openings file
        if main_opening == "Unknown" and opening_name != "Unknown":
            add_opening_to_file(openings_file, opening_name)
            main_opening = opening_name
            variation = "Unknown"

        game_entry = {
            "Game URL": game.get("url", ""),
            "Time Class": game.get("time_class", "N/A").capitalize(),
            "End Time": unix_to_readable(game.get("end_time", 0)),
            "White Player": game.get("white", {}).get("username", "Unknown"),
            "White Rating": game.get("white", {}).get("rating", "N/A"),
            "Black Player": game.get("black", {}).get("username", "Unknown"),
            "Black Rating": game.get("black", {}).get("rating", "N/A"),
            "Result": game_result,  # Processed as "Win", "Loss", or "Draw"
            "Main Opening": main_opening,  # Main opening from the text file
            "Variation": variation,  # Remaining part as variation
            "Information": pgn_split["Information"],  # Metadata
            "Moves": pgn_split["Moves"]  # Actual moves
        }
        if(game.get("rules", {}) == "chess"):
            game_list.append(game_entry)
    return game_list

def save_to_csv(data, filename):
    """
    Save data to a CSV file.
    """
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename} (CSV format)")

def save_to_json(data, filename):
    """
    Save data to a beautified JSON file.
    """
    with open(filename, "w") as file:
        json.dump(data, file, indent=4)
    print(f"Data saved to {filename} (JSON format)")

if __name__ == "__main__":
    username = "ardil30"  # Replace with your Chess.com username
    raw_file_path = f"./data/raw/{username}_raw_games.json"
    processed_json_path = f"./data/processed/{username}_games.json"
    processed_csv_path = f"./data/processed/{username}_games.csv"
    openings_file_path = "./data/processed/chess_openings.txt"

    # Ensure the processed data directory exists
    os.makedirs("./data/processed", exist_ok=True)

    # Load raw data
    with open(raw_file_path, "r") as file:
        raw_data = json.load(file)

    # Process data
    processed_data = process_game_data(raw_data, username, openings_file_path)

    # Save processed data
    save_to_json(processed_data, processed_json_path)
    save_to_csv(processed_data, processed_csv_path)


In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

def load_processed_data(file_path):
    """
    Load the processed game data from a JSON file.
    """
    return pd.read_json(file_path)

def analyze_openings(data):
    """
    Analyze most frequent main openings and their win rates.
    """
    # Calculate win rate for each main opening
    opening_stats = data.groupby('Main Opening').apply(
        lambda x: pd.Series({
            'Games Played': len(x),
            'Win Rate': (x['Result'] == 'Win').mean() * 100
        })
    )

    # Filter openings with at least 10 games
    opening_stats = opening_stats[opening_stats['Games Played'] >= 10]

    # Sort by the number of games played
    opening_stats = opening_stats.sort_values(by='Games Played', ascending=False)
    return opening_stats

def plot_opening_stats(opening_stats, save_path):
    """
    Plot the most common main openings and their win rates.
    """
    top_openings = opening_stats.head(10)  # Top 10 most played openings
    plt.figure(figsize=(12, 6))
    bars = plt.bar(top_openings.index, top_openings['Win Rate'], color='blue')

    # Annotate each bar with the number of games played
    for bar, games in zip(bars, top_openings['Games Played']):
        plt.text(
            bar.get_x() + bar.get_width() / 2,
            bar.get_height() + 2,
            f"{int(games)} games",
            ha='center',
            fontsize=10
        )

    # Set chart limits and labels
    plt.ylim(0, 100)  # Y-axis from 0 to 100%
    plt.title("Top 10 Main Openings by Win Rate", fontsize=16)
    plt.xlabel("Main Opening", fontsize=12)
    plt.ylabel("Win Rate (%)", fontsize=12)
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()

    # Save plot
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    plt.savefig(save_path, format="png")
    print(f"Plot saved to {save_path}")
    plt.close()

if __name__ == "__main__":
    # File path for processed data
    processed_data_path = "./data/processed/ardil30_games.json"
    username = "ardil30"  # Replace with your Chess.com username

    # Load processed data
    print("Loading processed game data...")
    data = load_processed_data(processed_data_path)

    # Analyze main openings
    print("Analyzing main openings...")
    opening_stats = analyze_openings(data)
    print("Main Opening Statistics (Top 10):")
    print(opening_stats.head(10))

    # Save path for the plot
    opening_plot_path = "./reports/figures/ardil30_main_opening_stats.png"

    # Plot and save main opening statistics
    print("Plotting main opening statistics...")
    plot_opening_stats(opening_stats, opening_plot_path)
    print("Analysis complete!")


In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

def load_processed_data(file_path):
    """
    Load the processed game data from a JSON file.
    """
    return pd.read_json(file_path)

def calculate_rating_differential(row, username):
    """
    Calculate rating differential for the player (opponent_rating - my_rating).
    """
    player_rating = (
        row["White Rating"] if row["White Player"].lower() == username.lower()
        else row["Black Rating"]
    )
    opponent_rating = (
        row["Black Rating"] if row["White Player"].lower() == username.lower()
        else row["White Rating"]
    )
    return opponent_rating - player_rating

def group_by_rating_differential(data, username):
    """
    Group games by smaller rating differentials (10-point intervals) and calculate win rates.
    """
    # Calculate rating differential for each game
    data["Rating Differential"] = data.apply(
        lambda row: calculate_rating_differential(row, username), axis=1
    )

    # Define rating differential bins
    bins = [-float("inf")] + list(range(-70, 71, 10)) + [float("inf")]
    labels = [f"{bins[i]} to {bins[i+1]}" for i in range(len(bins) - 1)]

    # Categorize each game into a bin
    data["Rating Differential Bin"] = pd.cut(
        data["Rating Differential"], bins=bins, labels=labels, right=False
    )

    # Calculate win rate for each bin
    differential_stats = data.groupby("Rating Differential Bin").apply(
        lambda x: pd.Series({
            "Games Played": len(x),
            "Win Rate": (x["Result"] == "Win").mean() * 100
        })
    )

    return differential_stats

def plot_stats(stats, title, xlabel, ylabel, save_path):
    """
    Plot win rates based on calculated stats.
    """
    plt.figure(figsize=(12, 6))
    bars = plt.bar(stats.index.astype(str), stats["Win Rate"], color="blue")

    # Annotate each bar with the number of games played
    for bar, games in zip(bars, stats["Games Played"]):
        plt.text(
            bar.get_x() + bar.get_width() / 2,
            bar.get_height() + 2,
            f"{int(games)} games",
            ha="center",
            fontsize=10
        )

    # Add a note to the graph
    plt.figtext(
        0.99, 0.01, 
        "Rating Differential = Opponent Rating - My Rating", 
        horizontalalignment="right", fontsize=10, color="gray"
    )

    # Set chart limits and labels
    plt.ylim(0, 100)  # Y-axis from 0 to 100%
    plt.title(title, fontsize=16)
    plt.xlabel(xlabel, fontsize=12)
    plt.ylabel(ylabel, fontsize=12)
    plt.xticks(rotation=45, ha="right")
    plt.tight_layout()

    # Save plot
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    plt.savefig(save_path, format="png")
    print(f"Plot saved to {save_path}")
    plt.close()

if __name__ == "__main__":
    # File path for processed data
    processed_data_path = "./data/processed/ardil30_games.json"
    username = "ardil30"  # Replace with your Chess.com username

    # Load processed data
    print("Loading processed game data...")
    data = load_processed_data(processed_data_path)

    # Group games by rating differential and calculate win rates
    print("Grouping games by rating differential...")
    differential_stats = group_by_rating_differential(data, username)

    # Print stats to console
    print("Win Rate by Rating Differential:")
    print(differential_stats)

    # Save path for the plot
    differential_plot_path = "./reports/figures/ardil30_rating_differential_stats.png"

    # Plot and save rating differential statistics
    print("Plotting rating differential statistics...")
    plot_stats(
        differential_stats,
        "Win Rate by Rating Differential",
        "Rating Differential",
        "Win Rate (%)",
        differential_plot_path
    )
    print("Analysis complete!")


In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import re


def load_processed_data(file_path):
    """
    Load the processed game data from a JSON file.
    """
    return pd.read_json(file_path)


def to_seconds(time_str):
    """
    Convert a time string in the format hours:minutes:seconds to seconds.
    """
    parts = list(map(float, time_str.split(":")))
    if len(parts) == 2:  # Format: mm:ss
        return parts[0] * 60 + parts[1]
    elif len(parts) == 3:  # Format: hh:mm:ss
        return parts[0] * 3600 + parts[1] * 60 + parts[2]


def extract_time_per_move(moves, player_color):
    """
    Extract and calculate the average time per move for a player (White or Black).
    """
    if player_color.lower() not in ["white", "black"]:
        raise ValueError("player_color must be 'white' or 'black'.")

    # Extract times based on the player's moves
    if player_color.lower() == "white":
        times = re.findall(r"[^\.]\.\s[^\[]+\[%clk\s([0-9:.]+)\]", moves)
    elif player_color.lower() == "black":
        times = re.findall(r"\.\.\.\s[^\[]+\[%clk\s([0-9:.]+)\]", moves)

    if len(times) <= 1:  # Not enough data to calculate time per move
        return None

    # Convert times into seconds
    times_in_seconds = list(map(to_seconds, times))

    # Calculate time differences (time per move)
    time_diffs = [
        times_in_seconds[i] - times_in_seconds[i + 1]
        for i in range(len(times_in_seconds) - 1)
        if times_in_seconds[i] > times_in_seconds[i + 1]  # Ignore anomalies
    ]
    return sum(time_diffs) / len(time_diffs) if time_diffs else None


def analyze_time_management(data, username):
    """
    Analyze time management for each time format and game outcome, focusing on the player's moves.
    """
    time_data = []

    for _, row in data.iterrows():
        if row["Time Class"].lower() == "daily":
            continue  # Exclude Daily games

        # Determine player's color
        player_color = "white" if row["White Player"].lower() == username.lower() else "black"
        
        # Extract average time per move for the player
        avg_time_per_move = extract_time_per_move(row["Moves"], player_color)

        if avg_time_per_move is not None:
            time_data.append({
                "Time Class": row["Time Class"],
                "Result": row["Result"],
                "Average Time Per Move": avg_time_per_move
            })

    # Convert to DataFrame
    time_data = pd.DataFrame(time_data)

    # Group by Time Class and Result
    grouped_data = time_data.groupby(["Time Class", "Result"]).agg({
        "Average Time Per Move": "mean"
    }).reset_index()

    return grouped_data


def plot_time_management(data, save_path):
    """
    Plot time management for all time formats in a single graph.
    """
    time_classes = data["Time Class"].unique()
    result_order = ["Loss", "Draw", "Win"]  # Desired order for results

    # Prepare data for plotting
    bar_width = 0.2
    x_positions = {time_class: i for i, time_class in enumerate(time_classes)}
    offset_map = {result: (idx - 1) * bar_width for idx, result in enumerate(result_order)}

    # Create plot
    plt.figure(figsize=(12, 6))
    for result in result_order:
        subset = data[data["Result"] == result]
        x_values = [x_positions[time_class] + offset_map[result] for time_class in subset["Time Class"]]
        y_values = subset["Average Time Per Move"]
        bars = plt.bar(
            x_values, y_values, bar_width, label=result,
            color="green" if result == "Win" else "red" if result == "Loss" else "blue"
        )

        # Annotate bars
        for bar in bars:
            plt.text(
                bar.get_x() + bar.get_width() / 2,
                bar.get_height(),
                f"{bar.get_height():.2f} sec",
                ha="center",
                fontsize=10
            )

    # Configure plot aesthetics
    plt.xticks(
        [x_positions[time_class] for time_class in time_classes],
        time_classes, fontsize=12
    )
    plt.ylim(0, max(data["Average Time Per Move"]) * 1.1)  # Add some padding
    plt.title("Average Time Per Move by Game Outcome", fontsize=16)
    plt.xlabel("Time Format", fontsize=12)
    plt.ylabel("Average Time Per Move (seconds)", fontsize=12)
    plt.legend(title="Game Outcome", fontsize=10)
    plt.tight_layout()

    # Save plot
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    plt.savefig(save_path, format="png")
    print(f"Plot saved to {save_path}")
    plt.close()


if __name__ == "__main__":
    # File path for processed data
    processed_data_path = "./data/processed/ardil30_games.json"
    username = "ardil30"  # Replace with your Chess.com username

    # Load processed data
    print("Loading processed game data...")
    data = load_processed_data(processed_data_path)

    # Analyze time management
    print("Analyzing time management...")
    time_management_stats = analyze_time_management(data, username)
    print("Time Management Statistics:")
    print(time_management_stats)

    # Save path for the plot
    time_management_plot_path = "./reports/figures/ardil30_time_management.png"

    # Plot and save time management statistics
    print("Plotting time management statistics...")
    plot_time_management(time_management_stats, time_management_plot_path)
    print("Analysis complete!")


In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

def load_data(file_path):
    """
    Load the processed JSON data.
    """
    return pd.read_json(file_path)

def prepare_trend_data(data, username, window_size=20):
    """
    Prepare data for trend analysis by extracting relevant columns and applying smoothing.
    """
    # Convert end time to datetime
    data['End Time'] = pd.to_datetime(data['End Time'])

    # Add player rating column
    data['Player Rating'] = data.apply(
        lambda row: row['White Rating'] if row['White Player'] == username else row['Black Rating'], axis=1
    )

    # Add opponent rating column
    data['Opponent Rating'] = data.apply(
        lambda row: row['Black Rating'] if row['White Player'] == username else row['White Rating'], axis=1
    )

    # Sort by time
    data = data.sort_values('End Time')

    # Apply moving average to smooth ratings
    data['Smoothed Rating'] = data.groupby('Time Class')['Player Rating'].transform(
        lambda x: x.rolling(window=window_size, min_periods=1).mean()
    )

    return data[['End Time', 'Time Class', 'Player Rating', 'Smoothed Rating', 'Opponent Rating']]

def plot_time_control(trend_data, time_class, save_path):
    """
    Plot smoothed rating trends for a specific time control and save the plot as an image.
    """
    subset = trend_data[trend_data['Time Class'] == time_class]

    # Debugging: Check if subset has data
    if subset.empty:
        print(f"No data found for time class: {time_class}")
        return

    plt.figure(figsize=(12, 6))
    plt.plot(subset['End Time'], subset['Smoothed Rating'], label=f"{time_class.capitalize()} Rating", color='blue')
    plt.title(f"{time_class.capitalize()} Rating Trends Over Time (Smoothed)", fontsize=16)
    plt.xlabel("Date", fontsize=12)
    plt.ylabel("Rating", fontsize=12)
    plt.grid(True)
    plt.tight_layout()

    # Save plot as an image
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    plt.savefig(save_path, format="png")
    print(f"Plot saved to {save_path}")
    plt.close()

if __name__ == "__main__":
    username = "ardil30"  # Replace with your Chess.com username
    processed_data_path = f"./data/processed/{username}_games.json"

    # Paths for each time control's plot
    blitz_plot_path = f"./reports/figures/{username}_blitz_rating_trends.png"
    bullet_plot_path = f"./reports/figures/{username}_bullet_rating_trends.png"
    rapid_plot_path = f"./reports/figures/{username}_rapid_rating_trends.png"

    # Load processed data
    data = load_data(processed_data_path)

    # Debugging: Print loaded data
    print("Loaded Data (Sample):")
    print(data.head())

    # Prepare trend data with smoothing
    trend_data = prepare_trend_data(data, username)

    # Debugging: Print trend data
    print("Trend Data (Sample):")
    print(trend_data.head())

    # Plot and save rating trends for each time control
    plot_time_control(trend_data, "Blitz", blitz_plot_path)
    plot_time_control(trend_data, "Bullet", bullet_plot_path)
    plot_time_control(trend_data, "Rapid", rapid_plot_path)
