In [1]:
import numpy as np
import pandas as pd 
import sys
import os
from nash_equilibrium.nash_solver import milp_max_sym_ent_2p, replicator_dynamics_nash

Matplotlib created a temporary cache directory at /var/folders/fh/fwc37qhn04d8sxp65hwv1kxm0000gn/T/matplotlib-y_kd3p5r because the default path (/Users/gabesmithline/.matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


In [2]:
performance_matrix = pd.read_csv('meta_game_analysis/game_matrix_2_100_bootstrap/csv/performance_matrix.csv', index_col=0)
payoff_matrix = performance_matrix.values
agents = performance_matrix.index.tolist()

In [3]:
performance_matrix

Unnamed: 0,anthropic_3.7_sonnet_circle_5,anthropic_3.7_sonnet_circle_6,anthropic_sonnet_3.7_reasoning_circle_0,gemini_2.0_flash_circle_2,gemini_2.0_flash_circle_5,openai_4o_circle_4,openai_4o_circle_5,openai_4o_circle_6,openai_o3_mini_circle_0
anthropic_3.7_sonnet_circle_5,613.56273,590.03452,589.555949,546.29322,476.77334,573.078226,632.12814,543.666267,665.612432
anthropic_3.7_sonnet_circle_6,618.56672,556.44817,562.11134,555.23956,633.60156,475.911149,605.486238,632.35891,717.304041
anthropic_sonnet_3.7_reasoning_circle_0,545.970369,606.5906,647.51886,479.915549,561.62145,574.660863,576.151356,564.6512,617.29995
gemini_2.0_flash_circle_2,457.08842,613.25806,649.005036,640.94433,614.17415,646.852567,518.86609,508.654636,558.485826
gemini_2.0_flash_circle_5,612.92289,518.02614,582.66918,534.72743,484.46824,552.947651,652.233979,547.774674,564.946173
openai_4o_circle_4,530.045477,478.733056,514.871105,600.312367,609.980687,576.927565,636.92351,505.243795,606.505
openai_4o_circle_5,639.32744,594.107611,613.518589,547.36038,553.316103,582.83751,520.13115,542.836689,601.958421
openai_4o_circle_6,716.942933,565.43451,606.004876,507.798862,563.803126,670.261323,582.806067,596.393667,600.586724
openai_o3_mini_circle_0,630.5084,563.882441,654.33635,530.903456,633.336432,615.818063,665.244211,592.903849,624.1938


In [4]:
def compute_regret(strategy, payoff_matrix):
    """
    Compute the regret for each agent given a strategy.
    
    Args:
        strategy: Nash equilibrium strategy vector
        payoff_matrix: Game payoff matrix
        
    Returns:
        regret: Vector of regrets for each agent
        nash_value: Expected utility when Nash plays against itself
    """
    # Expected utilities when playing against the Nash strategy
    expected_utils = payoff_matrix @ strategy
    
    # Nash equilibrium value (expected utility when Nash plays against itself)
    nash_value = strategy @ payoff_matrix @ strategy
    
    # Calculate regret for each agent
    regret = expected_utils - nash_value
    
    return regret, nash_value, expected_utils

def is_epsilon_nash(strategy, payoff_matrix, epsilon=0.05):
    """
    Check if a strategy is an epsilon-Nash equilibrium.
    
    Args:
        strategy: Strategy vector to check
        payoff_matrix: Game payoff matrix
        epsilon: Epsilon value for Nash equilibrium
        
    Returns:
        bool: True if it's an epsilon-Nash equilibrium
    """
    regret, nash_value, _ = compute_regret(strategy, payoff_matrix)
    max_regret = np.max(regret)
    
    print(f"Maximum regret: {max_regret:.6f}")
    print(f"Nash value: {nash_value:.6f}")
    
    return max_regret <= epsilon, max_regret, nash_value

In [5]:
print("Computing Nash equilibria...")
rd_nash = replicator_dynamics_nash(payoff_matrix, max_iter=10000, epsilon=.05)

(print(rd_nash))

me_nash = milp_max_sym_ent_2p(payoff_matrix)
print("\nReplicator Dynamics Nash Equilibrium:")
print(pd.DataFrame({
    'Agent': agents,
    'Probability': rd_nash
}))

print("\nMaximum Entropy Nash Equilibrium:")
print(pd.DataFrame({
    'Agent': agents,
    'Probability': me_nash
}))

# Check if the equilibria are close to each other


Computing Nash equilibria...


  payoff_exp = np.exp(current_step_size * expected_payoffs)
  new_strategy = new_strategy / np.sum(new_strategy)


[5.47754352e-34 1.26567770e-32 1.16299888e-61 1.00000000e+00
 6.52820284e-63 1.99213412e-49 6.75245897e-35 7.31319223e-52
 2.00901999e-42]

Replicator Dynamics Nash Equilibrium:
                                     Agent   Probability
0            anthropic_3.7_sonnet_circle_5  5.477544e-34
1            anthropic_3.7_sonnet_circle_6  1.265678e-32
2  anthropic_sonnet_3.7_reasoning_circle_0  1.162999e-61
3                gemini_2.0_flash_circle_2  1.000000e+00
4                gemini_2.0_flash_circle_5  6.528203e-63
5                       openai_4o_circle_4  1.992134e-49
6                       openai_4o_circle_5  6.752459e-35
7                       openai_4o_circle_6  7.313192e-52
8                  openai_o3_mini_circle_0  2.009020e-42

Maximum Entropy Nash Equilibrium:
                                     Agent   Probability
0            anthropic_3.7_sonnet_circle_5  1.422996e-01
1            anthropic_3.7_sonnet_circle_6  3.989840e-01
2  anthropic_sonnet_3.7_reasoning_circle_0  1.

In [6]:
# Check if they are 0.05-Nash equilibria
print("\nChecking if RD Nash is a 0.05-Nash equilibrium:")
rd_is_nash, rd_max_regret, rd_value = is_epsilon_nash(rd_nash, payoff_matrix, 0.05)
print(f"RD Nash is a 0.05-Nash equilibrium: {rd_is_nash}")

print("\nChecking if ME Nash is a 0.05-Nash equilibrium:")
me_is_nash, me_max_regret, me_value = is_epsilon_nash(me_nash, payoff_matrix, 0.05)
print(f"ME Nash is a 0.05-Nash equilibrium: {me_is_nash}")

# Compute and display individual regrets for both strategies
rd_regret, _, rd_expected_utils = compute_regret(rd_nash, payoff_matrix)
me_regret, _, me_expected_utils = compute_regret(me_nash, payoff_matrix)

print("\nRegrets for each agent under RD Nash strategy:")
print(pd.DataFrame({
    'Agent': agents,
    'Expected Utility': rd_expected_utils,
    'Regret': rd_regret
}))

print("\nRegrets for each agent under ME Nash strategy:")
print(pd.DataFrame({
    'Agent': agents,
    'Expected Utility': me_expected_utils,
    'Regret': me_regret
}))

# Compare the two equilibria
print("\nComparison of RD Nash and ME Nash:")
print(f"RD Nash Value: {rd_value:.6f}")
print(f"ME Nash Value: {me_value:.6f}")
print(f"RD Nash Max Regret: {rd_max_regret:.6f}")
print(f"ME Nash Max Regret: {me_max_regret:.6f}")
print(f"Difference in Nash Value: {abs(rd_value - me_value):.6f}")
print(f"Difference in Max Regret: {abs(rd_max_regret - me_max_regret):.6f}")

# Calculate the L1 distance between the two strategies
l1_distance = np.sum(np.abs(rd_nash - me_nash))
print(f"L1 distance between RD Nash and ME Nash: {l1_distance:.6f}")


Checking if RD Nash is a 0.05-Nash equilibrium:
Maximum regret: 0.000000
Nash value: 640.944330
RD Nash is a 0.05-Nash equilibrium: True

Checking if ME Nash is a 0.05-Nash equilibrium:
Maximum regret: 0.000000
Nash value: 603.729065
ME Nash is a 0.05-Nash equilibrium: True

Regrets for each agent under RD Nash strategy:
                                     Agent  Expected Utility      Regret
0            anthropic_3.7_sonnet_circle_5        546.293220  -94.651110
1            anthropic_3.7_sonnet_circle_6        555.239560  -85.704770
2  anthropic_sonnet_3.7_reasoning_circle_0        479.915549 -161.028781
3                gemini_2.0_flash_circle_2        640.944330    0.000000
4                gemini_2.0_flash_circle_5        534.727430 -106.216900
5                       openai_4o_circle_4        600.312367  -40.631963
6                       openai_4o_circle_5        547.360380  -93.583950
7                       openai_4o_circle_6        507.798862 -133.145468
8                  