In [None]:
PLAYER_NAME = "Dave"                    # Player of interest
ANCHOR_ENGINE = "stockfish"             # Anchor engine
ENGINES = [ANCHOR_ENGINE, "komodo"]     # Engines for which to retrieve game analyses
QUANTILE = 0.05                         # Best and worst Q% of games
R_MIN = 800                             # Floor for the worst performances
R_MAX = 2300                            # Ceiling for the best performances
EPSILON = 0.01                          # How close can get worst games get to R_min + epsilon*(R_max - R_min)

In [None]:
%run constants.ipynb
%run database.ipynb
%run analysis.ipynb
%run pathutils.ipynb
%run export.ipynb

## Single Engine Model

1. Generate the ACPL data set for the specified player and engines
2. Extract the ACPL values for the single engine of interest
3. Determine the best and worst games
4. Calculate K

In [None]:
import numpy as np

# Generate the ACPL data set
connection = connect()
player_acpl_df = generate_acpl_for_player(connection, PLAYER_NAME, ENGINES, 1)

# Extract the ACPL values for the anchor engine
anchor_df = player_acpl_df[player_acpl_df["engine"] == ANCHOR_ENGINE].copy()

# Get the best and worst Q% of games
ACPL_best_anchor = anchor_df["acpl"].quantile(QUANTILE)
ACPL_worst_anchor = anchor_df["acpl"].quantile(1.00 - QUANTILE)

# K defines how quickly Elo drops as ACPL increases
K_ANCHOR = np.log(1/EPSILON) / (ACPL_worst_anchor - ACPL_best_anchor)

print(f'ACPL(best) is {ACPL_best_anchor}')
print(f"K for games played by {PLAYER_NAME} and anlysed using {ANCHOR_ENGINE} is {K_ANCHOR}")

In [None]:
def personal_elo_single_engine(acpl, ACPL_best=ACPL_best_anchor):
    return R_MIN + (R_MAX - R_MIN) * np.exp(-K_ANCHOR * (acpl - ACPL_best))

anchor_df["elo_personal_single"] = personal_elo_single_engine(anchor_df["acpl"])
display(anchor_df)

## Multi-Engine Model

The ACPL calculation uses CPL values from the move analysis from each engine. That CPL is calculated on an internal scale specific to the engine so the ACPL calculated from analyses completed by different engines are not directly comparable. To make them comparable, we need to:

1. Calculate the mean (average, or typical) ACPL for the anchor engine
2. Calculate the standard deviation (spread) of ACPL for the anchor engine
3. Use the Z-score to "map" the reported ACPL for an engine onto the same scale as the anchor engine

In [None]:
# Compute the mean and standard deviation for ACPL calculated from the move analysis CPL values for the anchor engine
sf_stats = player_acpl_df[player_acpl_df["engine"] == ANCHOR_ENGINE]["acpl"].agg(["mean", "std"])
sf_mean, sf_std = sf_stats["mean"], sf_stats["std"]

# Turn each engineâ€™s z-score into a Stockfish-equivalent ACPL
player_acpl_df["acpl_multi"] = sf_mean + player_acpl_df["acpl_z"] * sf_std

# Get the best and worst Q% of games based on the "global" score
ACPL_best_multi = player_acpl_df["acpl_multi"].quantile(QUANTILE)
ACPL_worst_multi = player_acpl_df["acpl_multi"].quantile(1.00 - QUANTILE)

K_MULTI = np.log(1/EPSILON) / (ACPL_worst_multi - ACPL_best_multi)

display(player_acpl_df)

Having calculated "k" and determined "ACPL_best", the function personal_elo_multi_engine() provides a way of estimating ELO for a game with ACPL equal to "acpl"

In [None]:
def personal_elo_multi_engine(acpl_multi, ACPL_best=ACPL_best_multi):
    return R_MIN + (R_MAX - R_MIN) * np.exp(-K_MULTI * (acpl_multi - ACPL_best))

player_acpl_df["elo_personal_multi"] = personal_elo_multi_engine(player_acpl_df["acpl_multi"])
display(player_acpl_df.head())

Export the per-game Elo ratings and model parameters to a spreadsheet

In [None]:
import pandas as pd

parameters_df = pd.DataFrame(columns=["Context", "R_Min", "R_Max", "K", "ACPL_best"])
parameters_df.loc[len(parameters_df)] = [ANCHOR_ENGINE, R_MIN, R_MAX, K_ANCHOR, ACPL_best_anchor]
parameters_df.loc[len(parameters_df)] = ["Multi-Engine", R_MIN, R_MAX, K_MULTI, ACPL_best_multi]

export_to_spreadsheet(f"multi-engine-model-{ANCHOR_ENGINE}", {
    "Parameters": parameters_df,
    f"{ANCHOR_ENGINE} Engine Elo": anchor_df,
    "Multi-Engine Elo": player_acpl_df
})

A smoothed density plot shows the structure of ACPL values more clearly than a histogram and helping to motivate the exponential decay model

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde

# ACPL values
acpl = anchor_df["acpl"].values

# KDE
kde = gaussian_kde(acpl)

x = np.linspace(acpl.min(), acpl.max(), 400)
y = kde(x)

plt.figure(figsize=(8, 5))
plt.plot(x, y, linewidth=2)
plt.xlabel("ACPL")
plt.ylabel("Density")
plt.title(f"{ANCHOR_ENGINE} ACPL Distribution")

# Export the chart
export_chart("acpl-distribution", ANCHOR_ENGINE, "png")

plt.show()

In [None]:
import matplotlib.pyplot as plt

acpl_range = np.linspace(player_acpl_df["acpl_multi"].min(), player_acpl_df["acpl_multi"].max(), 200)
elo_curve = personal_elo_single_engine(acpl_range)

plt.figure(figsize=(8, 5))
plt.scatter(player_acpl_df["acpl_multi"], player_acpl_df["elo_personal_multi"], alpha=0.3, label="Games")
plt.plot(acpl_range, elo_curve, linewidth=2, label="Fitted curve")
plt.xlabel(f"ACPL")
plt.ylabel("Personal Elo Estimate")
plt.title(f"Multi-Engine Elo model (Anchor = {ANCHOR_ENGINE})")
plt.legend()

# Export the chart
export_chart("multi-engine-model", ANCHOR_ENGINE, "png")

plt.show()

In [None]:
# Merge in the the single engine ratings
compare_df = player_acpl_df.merge(
    anchor_df[["game_id", "elo_personal_single"]],
    on="game_id",
    how="inner"
)

display(compare_df)

plt.figure(figsize=(8, 5))

plt.scatter(compare_df["elo_personal_single"],
            compare_df["elo_personal_multi"],
            alpha=0.4)

plt.xlabel("Single-Engine Elo")
plt.ylabel("Multi-Engine Elo")
plt.title(f"Single Engine vs Multi-Engine Model Comparison")

# Draw the X = Y reference line
plt.axline((800, 800), (2300, 2300), linestyle="--")

# Export the chart
export_chart("model-comparison", "", "png")

plt.show()