In [2]:
import pandas as pd
import statistics
import math


def calculate_summary_statistics(analysis_df):
    """
    Given the CP losses and accuracies for each player for a game, calculate their ACPL and
    overall % accuracy then collate and return the summaries for both

    :param analysis: Analysis
    """
    # Extract the engine name
    game_id = analysis_df.iloc[0]["game_id"]
    engine = analysis_df.iloc[0]["engine"]
    depth = analysis_df.iloc[0]["depth"]

    summary_df = pd.DataFrame(columns=[
        "game_id",
        "engine",
        "depth",
        "player",
        "acpl",
        "accuracy",
        "elo",
        "dubious",
        "mistakes",
        "blunders"])

    for player in [WHITE, BLACK]:
        # Extract the analysis for this player
        player_analysis_df = analysis_df[analysis_df["player"] == player]
        cp_losses = player_analysis_df["cpl"].tolist()
        accuracies = player_analysis_df.loc[player_analysis_df["accuracy"] > 0, "accuracy"].tolist()
        annotations = player_analysis_df["annotation"].tolist()

        #Â Calculate the ACPL, overall accuracy and estimated ELO
        acpl = statistics.mean(cp_losses)
        accuracy = statistics.harmonic_mean(accuracies)
        elo = int(3100.0 * math.exp(-0.001 * acpl))

        # Calculate the counts for each annotation type
        dubious = len([a for a in annotations if a == "?!"])
        mistakes = len([a for a in annotations if a == "?"])
        blunders = len([a for a in annotations if a == "??"])

        summary_df.loc[len(summary_df)] = [
            game_id,
            engine,
            depth,
            player,
            acpl,
            accuracy,
            elo,
            dubious,
            mistakes,
            blunders
        ]

    return summary_df

1. Load the game IDs for all games involving the player of interest
2. Calculate the summary statsitics, that include ACPL
3. Build a data frame containing the ACPL values for the player of interest:

| index | game_id | engine    | depth | acpl       | acpl_z    |
| ----- | ------- | --------- | ----- | ---------- | --------- |
| 0     | 1       | stockfish | 0     | 91.680000  | 1.390843  |
| 1     | 2       | stockfish | 0     | 112.692308 | 2.093180  |
| 2     | 3       | stockfish | 0     | 22.057692  | -0.936284 |
| 3     | 4       | stockfish | 0     | 79.285714  | 0.976564  |
| 4     | 5       | stockfish | 0     | 67.312500  | 0.576359  |


In [3]:
import pandas as pd

def generate_acpl_for_player(connection, player_name, engines, minimum_games):
    # Load the games the player of interest has participated in
    games_df = load_player_game_ids(connection, player_name)

    # Construct the dataframe to hold analysis results for the player of interest
    player_acpl_df = pd.DataFrame(columns=[
            "game_id",
            "engine",
            "depth",
            "acpl"])

    # Iterate over the games
    for row in games_df.itertuples(index=False):
        # Extract the game ID and player colour for this game
        game_id = row.id
        player = row.Colour

        # Iterate over the engines of interest
        for engine in engines:
            # Load the analysis and generate a summary
            analysis_df = load_analysis(connection, game_id, engine)
            summary_df = calculate_summary_statistics(analysis_df)

            # # Extract the summary data for the player
            player_summary_df = summary_df[summary_df["player"] == player]
            player_acpl_df.loc[len(player_acpl_df)] = [
                player_summary_df.iloc[0]["game_id"],
                player_summary_df.iloc[0]["engine"],
                player_summary_df.iloc[0]["depth"],
                player_summary_df.iloc[0]["acpl"]
            ]


    # Drop engines with too few games
    engine_counts = player_acpl_df["engine"].value_counts()
    valid_engines = engine_counts[engine_counts >= minimum_games].index
    player_acpl_df = player_acpl_df[player_acpl_df["engine"].isin(valid_engines)].copy()

    # Calculate the z-score within each engine
    player_acpl_df["acpl_z"] = player_acpl_df.groupby("engine")["acpl"].transform(
        lambda x: (x - x.mean()) / x.std()
    )

    # Return the data
    return player_acpl_df