In [95]:
import pandas as pd
import numpy as np
from scipy.stats import entropy
from tqdm import tqdm

In [96]:
df = pd.read_csv('decks.csv')
matches = df.iloc[1:-2, 5:-2]
utils = matches.astype(float).to_numpy()
utils = utils - 2

In [97]:
def rnad_replicator_step(x, y, M, pi_reg_row, pi_reg_col, eta=0.2, dt=0.02, eps=1e-12):
    # Normalize inputs
    x = np.clip(x, eps, None); x /= x.sum()
    y = np.clip(y, eps, None); y /= y.sum()
    pi_reg_row = np.clip(pi_reg_row, eps, None); pi_reg_row /= pi_reg_row.sum()
    pi_reg_col = np.clip(pi_reg_col, eps, None); pi_reg_col /= pi_reg_col.sum()

    # Payoffs (row maximizes, column minimizes)
    q_row = M @ y                  # shape (n,)
    q_col = - M.T @ x              # shape (m,)

    # Regularized fitness
    f_row = q_row - eta*(np.log(x + eps) - np.log(pi_reg_row + eps))
    f_col = q_col - eta*(np.log(y + eps) - np.log(pi_reg_col + eps))

    # Replicator RHS
    u_row = f_row - x @ f_row
    u_col = f_col - y @ f_col

    # Multiplicative Euler step
    x = x * np.exp(dt * u_row); x /= x.sum()
    y = y * np.exp(dt * u_col); y /= y.sum()
    return x, y


In [98]:

num_nashes = 1000
nash_solutions = []

threshold = 1e-5
eta = 0.2
max_iters = 1000 * 10  # safety cap
M = utils

for i in tqdm(range(num_nashes)):
    # init x, y uniform, but pi_reg with new random values
    x = np.ones(M.shape[0]); x /= x.sum()
    y = np.ones(M.shape[1]); y /= y.sum()
    pi_reg_row = np.random.rand(M.shape[0]); pi_reg_row /= pi_reg_row.sum()
    pi_reg_col = pi_reg_row.copy()

    diff = np.inf
    k = 0
    while diff > threshold and k < max_iters:
        for _ in range(1000):
            x, y = rnad_replicator_step(x, y, M, pi_reg_row, pi_reg_col, eta=eta, dt=0.1)
        prev_pi_reg_row = pi_reg_row.copy()
        prev_pi_reg_col = pi_reg_col.copy()
        pi_reg_row = x.copy()
        pi_reg_col = y.copy()
        diff = max(np.linalg.norm(pi_reg_row - prev_pi_reg_row), np.linalg.norm(pi_reg_col - prev_pi_reg_col))
        k += 1

    entropy_row = entropy(pi_reg_row)
    nash_solutions.append({
        "row": pi_reg_row.copy(),
        "col": pi_reg_col.copy(),
        "entropy_row": entropy_row,
        "iters": k,
        "converged": diff <= threshold
    })

# Pick the Nash equilibrium with the highest entropy in row player
best_nash = max(nash_solutions, key=lambda v: v["entropy_row"])
print("Best Nash found (row):", best_nash["row"].round(3))
print("Best Nash found (col):", best_nash["col"].round(3))
print("Entropy:", best_nash["entropy_row"])
print("Converged in", best_nash["iters"], "outer loops:", best_nash["converged"])
print("Row sums: 0:12=%.3f, 12:16=%.3f, 16:=%0.3f" % (
    best_nash["row"][0:12].sum(),
    best_nash["row"][12:16].sum(),
    best_nash["row"][16:].sum()))


100%|██████████| 1000/1000 [06:13<00:00,  2.67it/s]

Best Nash found (row): [0.139 0.145 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.283 0.075 0.    0.    0.248 0.11  0.    0.    0.    0.    0.    0.
 0.   ]
Best Nash found (col): [0.139 0.145 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
 0.283 0.075 0.    0.    0.248 0.11  0.    0.    0.    0.    0.    0.
 0.   ]
Entropy: 1.6942168823844987
Converged in 13 outer loops: True
Row sums: 0:12=0.283, 12:16=0.358, 16:=0.358





In [102]:
# Compute the expected payoff for the row player given best_nash
row_strategy = best_nash["row"]
col_strategy = best_nash["col"]
expected_value = row_strategy @ M @ col_strategy

# Compute the best response payoff for the row player
best_response_payoff = np.max(M @ col_strategy)

regret = best_response_payoff - expected_value

print("The best response payoff is", best_response_payoff)
print("Regret of best Nash equilibrium (row player):", regret)
print("Is the row and column strategy the same?", np.allclose(row_strategy, col_strategy))

The best response payoff is 2.7288669596844597e-06
Regret of best Nash equilibrium (row player): 2.7288669596979204e-06
Is the row and column strategy the same? True
