In [2]:
from copy import deepcopy

import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

In [3]:
df = pd.read_csv("chess_championships.csv").drop(["Win", "Draw", "Loss"], axis=1)

df_2 = deepcopy(df)

df_2["Player"] = df["Opponent"]
df_2["Opponent"] = df["Player"]

df_2["Rating"] = df["Opponent rating"]
df_2["Opponent rating"] = df["Rating"]

df_2["Won so far"] = df["Lost so far"]
df_2["Lost so far"] = df["Won so far"]

df_2["White"] = 1 - df_2["White"]
df_2["Is challenger"] = 1 - df_2["Is challenger"]
df_2["Score"] = 1 - df_2["Score"]

df = pd.concat((df, df_2))

df["Abs rating diff"] = abs(df["Rating"] - df["Opponent rating"])
df["Played so far"] = df_2["Won so far"] + df_2["Drawn so far"] + df_2["Lost so far"]

df["Score diff"] = df["Won so far"] - df["Lost so far"]
df["abs Score diff"] = abs(df["Score diff"])

df["Rating diff"] = df["Rating"] - df["Opponent rating"]

df["Win"] = 1 * (df["Score"] == 1)
df["Draw"] = 1 * (df["Score"] == 0.5)
df["Loss"] = 1 * (df["Score"] == 0)

In [4]:
X = df[["Rating", "Opponent rating", "White", "Won so far", "Lost so far"]]
Y = 2 - 2 * df["Score"]

scaler = StandardScaler().fit(X)
scaled_X = scaler.transform(X)

In [5]:
model = LogisticRegression(random_state=0).fit(scaled_X, Y)

In [6]:
model.coef_.T

array([[ 2.33522393e-01,  9.63127844e-02, -3.29835177e-01],
       [-3.29835177e-01,  9.63127844e-02,  2.33522393e-01],
       [ 3.72619534e-01,  5.34476079e-20, -3.72619534e-01],
       [-1.43627243e-01,  8.98230181e-02,  5.38042252e-02],
       [ 5.38042252e-02,  8.98230181e-02, -1.43627243e-01]])

In [7]:
predicted_win = model.predict_proba(scaled_X)

In [8]:
np.random.rand()

0.6618897418641457

In [9]:
NUM_SIMULATIONS = 10_000

record = []

for i in range(NUM_SIMULATIONS):
    wins = 1
    losses = 2

    for j in range(3):
        result = np.random.choice(
            a=np.array([1, 0.5, 0]),
            p=model.predict_proba(
                scaler.transform(
                    np.array([2728, 2783, (j + 1) % 2, wins, losses]).reshape(1, -1)
                )
            )[0],
        )

        if result == 1:
            wins += 1
        elif result == 0:
            losses += 1

    record.append(wins - losses)



In [10]:
np.sum(np.array(record) > 0) / NUM_SIMULATIONS

np.float64(0.0391)

In [11]:
np.sum(np.array(record) < 0) / NUM_SIMULATIONS

np.float64(0.7489)

In [12]:
np.sum(np.array(record) == 0) / NUM_SIMULATIONS

np.float64(0.212)

In [13]:
(
    np.sum(np.array(record) > 0) / NUM_SIMULATIONS
    + (np.sum(np.array(record) == 0) / NUM_SIMULATIONS) / 2
)

np.float64(0.1451)

In [16]:
model.predict_proba(scaler.transform(np.array([2728, 2783, 1, 1, 2]).reshape(1, -1)))



array([[0.17620385, 0.69670537, 0.12709077]])

In [15]:
model.predict_proba(scaler.transform(np.array([2783, 2728, 1, 0, 1]).reshape(1, -1)))



array([[0.31287774, 0.6105119 , 0.07661036]])