In [None]:
PLAYER_NAME = "Dave"                    # Player of interest
ANCHOR_ENGINE = "stockfish"             # Anchor engine
ENGINES = [ANCHOR_ENGINE, "komodo"]     # Engines for which to retrieve game analyses
QUANTILE = 0.05                         # Best and worst Q% of games
R_MIN = 800                             # Floor for the worst performances
R_MAX = 2300                            # Ceiling for the best performances
EPSILON = 0.01                          # How close can get worst games get to R_min + epsilon*(R_max - R_min)

In [None]:
%run constants.ipynb
%run database.ipynb
%run analysis.ipynb
%run pathutils.ipynb
%run export.ipynb

In [None]:
from scipy.stats import gaussian_kde
import numpy as np
import matplotlib.pyplot as plt

def plot_kde(df, engine):
    """
    Helper method to plot a KDE (Kernel Density Estimate) chart of ACPL values for a given engine, showing the distribution
    of ACPL values for that engine
    """
    vals = df[df["engine"] == engine]["acpl"].values
    kde = gaussian_kde(vals)
    x = np.linspace(vals.min(), vals.max(), 400)
    plt.plot(x, kde(x), label=engine)

## Single Engine Model

1. Generate the ACPL data set for the specified player and engines
2. Extract the ACPL values for the single engine of interest
3. Determine the best and worst games
4. Calculate K

In [None]:
import numpy as np

# Generate the ACPL data set
connection = connect()
player_acpl_df = generate_acpl_for_player(connection, PLAYER_NAME, ENGINES, 1)

# Compute empirical CDF positions for later validation 
player_acpl_df["acpl_quantile"] = (
    player_acpl_df
    .groupby("engine")["acpl"]
    .rank(pct=True)
)

# Extract the ACPL values for the anchor engine
anchor_df = player_acpl_df[player_acpl_df["engine"] == ANCHOR_ENGINE].copy()

# Get the best and worst Q% of games
ACPL_best_anchor = anchor_df["acpl"].quantile(QUANTILE)
ACPL_worst_anchor = anchor_df["acpl"].quantile(1.00 - QUANTILE)

# K defines how quickly playing performance drops as ACPL increases
K_ANCHOR = np.log(1/EPSILON) / (ACPL_worst_anchor - ACPL_best_anchor)

print(f'ACPL(best) is {ACPL_best_anchor}')
print(f"K for games played by {PLAYER_NAME} and anlysed using {ANCHOR_ENGINE} is {K_ANCHOR}")

In [None]:
def playing_performance_single_engine(acpl, ACPL_best=ACPL_best_anchor):
    return R_MIN + (R_MAX - R_MIN) * np.exp(-K_ANCHOR * (acpl - ACPL_best))

anchor_df["performance_single"] = playing_performance_single_engine(anchor_df["acpl"])
display(anchor_df)

## Multi-Engine Model

The ACPL calculation uses CPL values from the move analysis from each engine. That CPL is calculated on an internal scale specific to the engine so the ACPL calculated from analyses completed by different engines are not directly comparable. To make them comparable, we need to:

1. Calculate the mean (average, or typical) ACPL for the anchor engine
2. Calculate the standard deviation (spread) of ACPL for the anchor engine
3. Use the Z-score to "map" the reported ACPL for an engine onto the same scale as the anchor engine

In [None]:
# Compute the mean and standard deviation for ACPL calculated from the move analysis CPL values for the anchor engine
sf_stats = player_acpl_df[player_acpl_df["engine"] == ANCHOR_ENGINE]["acpl"].agg(["mean", "std"])
sf_mean, sf_std = sf_stats["mean"], sf_stats["std"]

# Turn each engine’s z-score into a Stockfish-equivalent ACPL
player_acpl_df["acpl_multi"] = sf_mean + player_acpl_df["acpl_z"] * sf_std

# Get the best and worst Q% of games based on the "global" score
ACPL_best_multi = player_acpl_df["acpl_multi"].quantile(QUANTILE)
ACPL_worst_multi = player_acpl_df["acpl_multi"].quantile(1.00 - QUANTILE)

K_MULTI = np.log(1/EPSILON) / (ACPL_worst_multi - ACPL_best_multi)

display(player_acpl_df)

Having calculated "k" and determined "ACPL_best", the function playing_performance_multi_engine() provides a way of estimating playing performance for a game with ACPL equal to "acpl"

In [None]:
def playing_performance_multi_engine(acpl_multi, ACPL_best=ACPL_best_multi):
    return R_MIN + (R_MAX - R_MIN) * np.exp(-K_MULTI * (acpl_multi - ACPL_best))

player_acpl_df["performance_multi"] = playing_performance_multi_engine(player_acpl_df["acpl_multi"])
display(player_acpl_df.head())

A smoothed density plot shows the structure of ACPL values more clearly than a histogram and helping to motivate the exponential decay model

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde

# ACPL values
acpl = anchor_df["acpl"].values

# KDE
kde = gaussian_kde(acpl)

x = np.linspace(acpl.min(), acpl.max(), 400)
y = kde(x)

plt.figure(figsize=(12, 5))
plt.plot(x, y, linewidth=2)
plt.xlabel("ACPL")
plt.ylabel("Density")
plt.title(f"{ANCHOR_ENGINE} ACPL Distribution")

# Export the chart
export_chart("acpl-distribution", ANCHOR_ENGINE, "png")

plt.show()

In [None]:
import matplotlib.pyplot as plt

acpl_range = np.linspace(player_acpl_df["acpl_multi"].min(), player_acpl_df["acpl_multi"].max(), 200)
performance_curve = playing_performance_single_engine(acpl_range)

plt.figure(figsize=(12, 5))
plt.scatter(player_acpl_df["acpl_multi"], player_acpl_df["performance_multi"], alpha=0.3, label="Games")
plt.plot(acpl_range, performance_curve, linewidth=2, label="Fitted curve")
plt.xlabel(f"ACPL")
plt.ylabel("Personal Performance Estimate")
plt.title(f"Multi-Engine Performance Model (Anchor = {ANCHOR_ENGINE})")
plt.legend()

# Export the chart
export_chart("multi-engine-model", ANCHOR_ENGINE, "png")

plt.show()

## Model Comparison Chart

In [None]:
# Merge in the the single engine performance ratings
compare_df = player_acpl_df.merge(
    anchor_df[["game_id", "performance_single"]],
    on="game_id",
    how="inner"
)

plt.figure(figsize=(12, 5))

plt.scatter(compare_df["performance_single"],
            compare_df["performance_multi"],
            alpha=0.4)

plt.xlabel("Single-Engine Performance")
plt.ylabel("Multi-Engine Performance")
plt.title(f"Single Engine vs Multi-Engine Model Comparison")

# Draw the X = Y reference line
plt.axline((800, 800), (2300, 2300), linestyle="--")

# Export the chart
export_chart("model-comparison", "", "png")

plt.show()

## KDE Overlay

The KDE plots show the distribution of ACPL values produced by each engine. Horizontal offsets reflect engine-specific evaluation scales, while the overall shape and tail behaviour indicate how engines assess relative game quality

In [None]:
from scipy.stats import gaussian_kde
import numpy as np
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 5))
for engine in ENGINES:
    plot_kde(player_acpl_df, engine)

plt.xlabel("ACPL")
plt.ylabel("Density")
plt.title("ACPL Distributions by Engine")
plt.legend()

# Export the chart
export_chart("multi-engine-acpl-kde-overlay", "", "png")

plt.show()

## Game-by-Game Comparison

Expect a tight diagonal cloud around the reference line in the chart if the engines mostly agree

In [None]:
game_by_game_df = (
    player_acpl_df
    .pivot_table(index="game_id", columns="engine", values="acpl")
    .dropna(subset=ENGINES)
)

display(game_by_game_df.head())

In [None]:
import numpy as np
import matplotlib.pyplot as plt

engines = [c for c in game_by_game_df.columns if c != ANCHOR_ENGINE]

plt.figure(figsize=(12, 5))

# plot each engine against the anchor on the same axes
for eng in engines:
    plt.scatter(game_by_game_df[ANCHOR_ENGINE], game_by_game_df[eng], alpha=0.35, label=eng)

plt.xlabel(f"{ANCHOR_ENGINE} ACPL")
plt.ylabel("Other engine ACPL")
plt.title(f"Same-Game ACPL: {ANCHOR_ENGINE} vs others")

# y=x reference line based on global min/max across plotted columns
all_vals = game_by_game_df[[ANCHOR_ENGINE] + engines].to_numpy()
vmin = np.nanmin(all_vals)
vmax = np.nanmax(all_vals)
plt.plot([vmin, vmax], [vmin, vmax], linestyle="--")

plt.legend()
plt.show()

## Z-Score Correlation

The Z-score correlation answers the question "When one engine says a game was relatively good or bad for you, does the other engine agree?"

| Correlation | Interpretation            | Approach Validity                      |
| ----------- | ------------------------- | -------------------------------------- |
| >= 0.8      | Very strong agreement     | Z-score normalisation is valid         |
| 0.7 - 0.8   | Strong agreement          | Z-score normalisation is valid         |
| 0.65 - 0.7  | Borderline but acceptable | Use Z-score normalisation with caution |
| 0.60 – 0.65 | Weak agreement            | Use single engine approach             |
| < 0.6       | Poor agreement            | Use single engine approach             |

In [None]:
correlation_df = (
    player_acpl_df
    .pivot_table(index="game_id", columns="engine", values="acpl_z")
    .dropna(subset=ENGINES)
)

correlation_df.corr()

## Normalisation Validation

If the curves allign, post-normalisation, this suggests that engines agree not only on ranking but on the relative extremeness of good and bad games

In [None]:
# Use a shared X range - if each engine is allowed to have its own x, then curves may look misaligned
global_min = player_acpl_df["acpl_multi"].min()
global_max = player_acpl_df["acpl_multi"].max()
x = np.linspace(global_min, global_max, 400)

plt.figure(figsize=(12, 5))

for engine in ENGINES:
    vals = player_acpl_df[player_acpl_df["engine"] == engine]["acpl_multi"].dropna()
    kde = gaussian_kde(vals)

    if engine == ANCHOR_ENGINE:
        plt.plot(x, kde(x), linewidth=3, label=engine)
    else:
        plt.plot(x, kde(x), alpha=0.7, label=engine)

plt.xlabel("ACPL")
plt.ylabel("Density")
plt.title("ACPL Distributions by Engine")
plt.legend()

# Export the chart
export_chart("multi-engine-normalisation-validation", "", "png")

plt.show()

## Data Export (Including Validation Data)

In [None]:
import pandas as pd

parameters_df = pd.DataFrame(columns=["Context", "R_Min", "R_Max", "K", "ACPL_best"])
parameters_df.loc[len(parameters_df)] = [ANCHOR_ENGINE, R_MIN, R_MAX, K_ANCHOR, ACPL_best_anchor]
parameters_df.loc[len(parameters_df)] = ["Multi-Engine", R_MIN, R_MAX, K_MULTI, ACPL_best_multi]

export_to_spreadsheet(f"multi-engine-model-{ANCHOR_ENGINE}", {
    "Parameters": parameters_df,
    f"{ANCHOR_ENGINE} Performance": anchor_df,
    "Multi-Engine Performance": player_acpl_df,
    "Game-By-Game": game_by_game_df,
    "Z-Score Correlation": correlation_df.corr()
})