In [95]:
import pandas as pd
import numpy as np
from scipy.stats import entropy
from tqdm import tqdm

In [96]:
df = pd.read_csv('decks.csv')
matches = df.iloc[1:-2, 5:-2]
utils = matches.astype(float).to_numpy()
utils = utils - 2

In [97]:
def rnad_replicator_step(x, y, M, pi_reg_row, pi_reg_col, eta=0.2, dt=0.02, eps=1e-12):
    # Normalize inputs
    x = np.clip(x, eps, None); x /= x.sum()
    y = np.clip(y, eps, None); y /= y.sum()
    pi_reg_row = np.clip(pi_reg_row, eps, None); pi_reg_row /= pi_reg_row.sum()
    pi_reg_col = np.clip(pi_reg_col, eps, None); pi_reg_col /= pi_reg_col.sum()

    # Payoffs (row maximizes, column minimizes)
    q_row = M @ y                  # shape (n,)
    q_col = - M.T @ x              # shape (m,)

    # Regularized fitness
    f_row = q_row - eta*(np.log(x + eps) - np.log(pi_reg_row + eps))
    f_col = q_col - eta*(np.log(y + eps) - np.log(pi_reg_col + eps))

    # Replicator RHS
    u_row = f_row - x @ f_row
    u_col = f_col - y @ f_col

    # Multiplicative Euler step
    x = x * np.exp(dt * u_row); x /= x.sum()
    y = y * np.exp(dt * u_col); y /= y.sum()
    return x, y


In [113]:

num_nashes = 1000
nash_solutions = []

threshold = 1e-8
eta = 0.2
max_iters = 1000 * 10  # safety cap
M = utils

# Get the deck names from the dataframe
deck_names = df.iloc[1:-2, 1].reset_index(drop=True)  # assuming first column is name

for i in tqdm(range(num_nashes)):
    # init x, y uniform, but pi_reg with new random values
    x = np.ones(M.shape[0]); x /= x.sum()
    y = np.ones(M.shape[1]); y /= y.sum()
    pi_reg_row = np.random.rand(M.shape[0]); pi_reg_row /= pi_reg_row.sum()
    pi_reg_col = pi_reg_row.copy()

    diff = np.inf
    k = 0
    while diff > threshold and k < max_iters:
        for _ in range(1000):
            x, y = rnad_replicator_step(x, y, M, pi_reg_row, pi_reg_col, eta=eta, dt=0.1)
        prev_pi_reg_row = pi_reg_row.copy()
        prev_pi_reg_col = pi_reg_col.copy()
        pi_reg_row = x.copy()
        pi_reg_col = y.copy()
        diff = max(np.linalg.norm(pi_reg_row - prev_pi_reg_row), np.linalg.norm(pi_reg_col - prev_pi_reg_col))
        k += 1

    entropy_row = entropy(pi_reg_row)
    nash_solutions.append({
        "row": pi_reg_row.copy(),
        "col": pi_reg_col.copy(),
        "entropy_row": entropy_row,
        "iters": k,
        "converged": diff <= threshold
    })

# Pick the Nash equilibrium with the highest entropy in row player
best_nash = max(nash_solutions, key=lambda v: v["entropy_row"])

# Prepare a nice output table for the support and its weight
row_probs = best_nash["row"]
support_mask = row_probs > 1e-6
support_names = deck_names[support_mask]
support_weights = row_probs[support_mask]

output_table = pd.DataFrame({
    "Deck": support_names.values,
    "Weight": support_weights
}).sort_values("Weight", ascending=False).reset_index(drop=True)

print("Best Nash equilibrium support (row player):")
print(output_table.to_string(index=False, float_format="%.4f"))

print("Entropy:", best_nash["entropy_row"])
print("Converged in", best_nash["iters"], "outer loops:", best_nash["converged"])
print("Row sums: 0:12=%.3f, 12:16=%.3f, 16:=%0.3f" % (
    best_nash["row"][0:12].sum(),
    best_nash["row"][12:16].sum(),
    best_nash["row"][16:].sum()))


100%|██████████| 1000/1000 [10:46<00:00,  1.55it/s]

Best Nash equilibrium support (row player):
                      Deck  Weight
   Chancellor of the Annex  0.2967
      Koma, Cosmos Serpent  0.2147
           Thassa's Oracle  0.1605
           Absolute Virtue  0.1370
Jace, Wielder of Mysteries  0.1363
Herald of the eternal dawn  0.0549
Entropy: 1.6876094914508124
Converged in 63 outer loops: True
Row sums: 0:12=0.297, 12:16=0.352, 16:=0.352





In [114]:
# Compute the expected payoff for the row player given best_nash
row_strategy = best_nash["row"]
col_strategy = best_nash["col"]
expected_value = row_strategy @ M @ col_strategy

# Compute the best response payoff for the row player
best_response_payoff = np.max(M @ col_strategy)

regret = best_response_payoff - expected_value

print("The best response payoff is", best_response_payoff)
print("Regret of best Nash equilibrium (row player):", regret)
print("Is the row and column strategy the same?", np.allclose(row_strategy, col_strategy))

The best response payoff is 6.169057055261138e-09
Regret of best Nash equilibrium (row player): 6.1690570272841584e-09
Is the row and column strategy the same? True
