Skip to content

Commit

Permalink
reformatted the code using black
Browse files Browse the repository at this point in the history
  • Loading branch information
sandeepvshenoy committed Mar 21, 2024
1 parent c59ceee commit 7d4af19
Show file tree
Hide file tree
Showing 5 changed files with 137 additions and 62 deletions.
38 changes: 21 additions & 17 deletions src/nashpy/algorithms/regret_minimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,41 +2,45 @@
from typing import Generator, Tuple, Any
import numpy.typing as npt


def compute_regrets(strategy_utilities, current_strategy):
'''
This function calculates the regrets for a player based on the strategy utilities and the current strategy.
Regrets represent the difference between the utility achieved by playing a strategy and the maximum utility
that could have been achieved by playing any strategy.
"""
This function calculates the regrets for a player based on the strategy utilities and the current strategy.
Regrets represent the difference between the utility achieved by playing a strategy and the maximum utility
that could have been achieved by playing any strategy.
In this implementation, only positive regrets are considered
'''
"""
regrets = np.maximum(0, strategy_utilities - current_strategy)
return regrets


def update_strategy(current_strategy, regrets, learning_rate):
'''
"""
This function updates the player's strategy based on the regrets, the current strategy, and a fixed learning rate.
It scales the regrets by the learning rate and adds them to the current strategy.
It scales the regrets by the learning rate and adds them to the current strategy.
Finally, it normalizes the updated strategy to ensure that the probabilities sum up to 1.
'''
"""
updated_strategy = current_strategy + learning_rate * regrets
return updated_strategy / np.sum(updated_strategy)


def generate_abs_strategy(strategy_list):
'''
"""
This function will return most favorable utility by a player based on the max probability value
'''
"""
max_probability = max(strategy_list)
strategy_relative = [1 if x == max_probability else 0 for x in strategy_list]
sum_value_in_the_list = sum(strategy_relative)
favorable_strategy = [ x / sum_value_in_the_list for x in strategy_relative]
favorable_strategy = [x / sum_value_in_the_list for x in strategy_relative]
return favorable_strategy


def regret_minimization(
A: npt.NDArray, B: npt.NDArray, learning_rate = 0.1, max_iterations=100
A: npt.NDArray, B: npt.NDArray, learning_rate=0.1, max_iterations=100
) -> Generator[Tuple[float, float], Any, None]:
"""
Obtain the Nash equilibria using regret minimization method using N number of itreations.
The code provided is based on the concept of regret matching,
The code provided is based on the concept of regret matching,
with the fixed learning rate.
Algorithm implemented here is Algorithm 4.3 Theorem 4.4 of [Nisan2007]_
Expand All @@ -52,12 +56,12 @@ def regret_minimization(
learning_rate: float ( Optional Defaulted to 0.1 )
The learning_rate determines the magnitude of the update towards the regrets
The learning rate scales the regrets before they are added to the current strategy.
A higher learning rate results in a larger update, while a lower learning rate leads to a smaller update.
The learning rate scales the regrets before they are added to the current strategy.
A higher learning rate results in a larger update, while a lower learning rate leads to a smaller update.
This value allows you to control the pace towards a Nash equilibrium.
max_itreations: Integer ( Optional Defaulted to 100 )
This value is defaulted to 100 itrations, this number could be modified to a larger or smaller number based on the untilities/payoff matrix shape
This value is defaulted to 100 itrations, this number could be modified to a larger or smaller number based on the untilities/payoff matrix shape
Yields
Expand All @@ -82,4 +86,4 @@ def regret_minimization(
strategy_A_final = generate_abs_strategy(strategy_A)
strategy_B_final = generate_abs_strategy(strategy_B)

yield strategy_A_final, strategy_B_final
yield strategy_A_final, strategy_B_final
37 changes: 27 additions & 10 deletions src/nashpy/egt/imitation_dynamics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
from typing import Generator, Tuple, Any
import numpy.typing as npt


def payoff(player_strategy, opponent_strategy, player_payoff_matrix):
'''
"""
Calculate the payoff of a player given their strategy and the opponent's strategy.
Parameters:
Expand All @@ -13,13 +14,19 @@ def payoff(player_strategy, opponent_strategy, player_payoff_matrix):
Returns:
- payoff: scalar representing the payoff of the player
'''
"""
return np.dot(player_strategy, np.dot(player_payoff_matrix, opponent_strategy))


def imitation_dynamics(
A: npt.NDArray, B: npt.NDArray, population_size=100, num_of_generations=1000, random_seed=None,threshold=0.5
) -> Generator[Tuple[float, float], Any, None]:
'''
A: npt.NDArray,
B: npt.NDArray,
population_size=100,
num_of_generations=1000,
random_seed=None,
threshold=0.5,
) -> Generator[Tuple[float, float], Any, None]:
"""
Simulate the imitation dynamics for a given game represented by payoff matrices A and B.
Parameters:
Expand All @@ -33,21 +40,31 @@ def imitation_dynamics(
Yields:
- nash_equilibrium_player1: numpy array representing the Nash equilibrium strategy for Player 1
- nash_equilibrium_player2: numpy array representing the Nash equilibrium strategy for Player 2
'''
"""

num_strategies = len(A)

# Initialize population
if random_seed:
np.random.seed(random_seed) # Set random seed for reproducibility
np.random.seed(random_seed) # Set random seed for reproducibility

population_A = np.random.dirichlet(np.ones(num_strategies), size=population_size)
population_B = np.random.dirichlet(np.ones(num_strategies), size=population_size)

for generation in range(num_of_generations):
# Play the game
payoffs_A = np.array([payoff(population_A[i], population_B[i], A) for i in range(population_size)])
payoffs_B = np.array([payoff(population_B[i], population_A[i], B) for i in range(population_size)])
payoffs_A = np.array(
[
payoff(population_A[i], population_B[i], A)
for i in range(population_size)
]
)
payoffs_B = np.array(
[
payoff(population_B[i], population_A[i], B)
for i in range(population_size)
]
)

# Update population based on payoffs
# Used Imitation dynamics in which the players copy the strategy of the most successful individual
Expand All @@ -66,4 +83,4 @@ def imitation_dynamics(
nash_equilibrium_B[nash_equilibrium_B >= threshold] = 1
nash_equilibrium_B[nash_equilibrium_B < threshold] = 0

yield nash_equilibrium_A, nash_equilibrium_B
yield nash_equilibrium_A, nash_equilibrium_B
25 changes: 18 additions & 7 deletions src/nashpy/game.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ def linear_program(self):
column_strategy = linear_program(row_player_payoff_matrix=B.T)
return row_strategy, column_strategy

def regret_minimization(self,learning_rate = 0.1, max_iterations=100):
def regret_minimization(self, learning_rate=0.1, max_iterations=100):
"""
Build best Strategies probability of both players using regret minimization method
Algorithm implemented here is Algorithm 4.3 Theorem 4.4 of [Nisan2007]_
Expand All @@ -435,12 +435,12 @@ def regret_minimization(self,learning_rate = 0.1, max_iterations=100):
learning_rate: float ( Optional Defaulted to 0.1 )
The learning_rate determines the magnitude of the update towards the regrets
The learning rate scales the regrets before they are added to the current strategy.
A higher learning rate results in a larger update, while a lower learning rate leads to a smaller update.
The learning rate scales the regrets before they are added to the current strategy.
A higher learning rate results in a larger update, while a lower learning rate leads to a smaller update.
This value allows you to control the pace towards a Nash equilibrium.
max_itreations: Integer ( Optional Defaulted to 100 )
This value is defaulted to 100 itrations, this number could be modified to a larger or smaller number based on the untilities/payoff matrix shape
This value is defaulted to 100 itrations, this number could be modified to a larger or smaller number based on the untilities/payoff matrix shape
Returns
-------
tuple
Expand All @@ -451,7 +451,13 @@ def regret_minimization(self,learning_rate = 0.1, max_iterations=100):
A=A, B=B, learning_rate=learning_rate, max_iterations=max_iterations
)

def imitation_dynamics(self,population_size=100, num_of_generations=1000,random_seed=None,threshold=0.5 ):
def imitation_dynamics(
self,
population_size=100,
num_of_generations=1000,
random_seed=None,
threshold=0.5,
):
"""
Simulate the imitation dynamics for a given game represented by payoff matrices A and B.
Expand All @@ -470,5 +476,10 @@ def imitation_dynamics(self,population_size=100, num_of_generations=1000,random_
"""
A, B = self.payoff_matrices
return imitation_dynamics(
A=A, B=B, population_size=population_size, num_of_generations=num_of_generations,random_seed=random_seed,threshold=threshold
)
A=A,
B=B,
population_size=population_size,
num_of_generations=num_of_generations,
random_seed=random_seed,
threshold=threshold,
)
46 changes: 33 additions & 13 deletions tests/unit/test_imitation_dynamics.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,32 @@
from nashpy.egt.imitation_dynamics import imitation_dynamics
from nashpy.egt.imitation_dynamics import imitation_dynamics
import numpy as np
import unittest
import random
class TestImitationDynamics(unittest.TestCase):


class TestImitationDynamics(unittest.TestCase):
def test_positive_payoffs(self):
A = np.array([[3, 0], [1, 3]]) # Payoff matrix for Player 1
B = np.array([[0, 1], [3, 0]]) # Payoff matrix for Player 2
nash_equilibrium_player1, nash_equilibrium_player2 = next(imitation_dynamics(A, B))
nash_equilibrium_player1, nash_equilibrium_player2 = next(
imitation_dynamics(A, B)
)
# Assert that Nash equilibrium strategies are within the expected range
self.assertTrue(np.all(nash_equilibrium_player1 >= 0))
self.assertTrue(np.all(nash_equilibrium_player1 <= 1))
self.assertTrue(np.all(nash_equilibrium_player2 >= 0))
self.assertTrue(np.all(nash_equilibrium_player2 <= 1))

def test_negative_payoffs(self):
A = np.array([[-1, 0], [0, -1]]) # Payoff matrix for Player 1 (negative payoffs)
B = np.array([[0, -1], [-1, 0]]) # Payoff matrix for Player 2 (negative payoffs)
nash_equilibrium_player1, nash_equilibrium_player2 = next(imitation_dynamics(A, B))
A = np.array(
[[-1, 0], [0, -1]]
) # Payoff matrix for Player 1 (negative payoffs)
B = np.array(
[[0, -1], [-1, 0]]
) # Payoff matrix for Player 2 (negative payoffs)
nash_equilibrium_player1, nash_equilibrium_player2 = next(
imitation_dynamics(A, B)
)
# Assert that Nash equilibrium strategies are within the expected range
self.assertTrue(np.all(nash_equilibrium_player1 >= 0))
self.assertTrue(np.all(nash_equilibrium_player1 <= 1))
Expand All @@ -35,8 +44,12 @@ def test_randomness(self):
results = []
for i in range(10): # Run 10 iterations
# Run imitation dynamics with random seed set to None (random initialization)
nash_equilibrium_player1, nash_equilibrium_player2 = next(imitation_dynamics(A, B, population_size, num_generations))
results.append((tuple(nash_equilibrium_player1), tuple(nash_equilibrium_player2))) # Convert numpy arrays to tuples
nash_equilibrium_player1, nash_equilibrium_player2 = next(
imitation_dynamics(A, B, population_size, num_generations)
)
results.append(
(tuple(nash_equilibrium_player1), tuple(nash_equilibrium_player2))
) # Convert numpy arrays to tuples
# Check if the results are different in at least one pair of iterations
self.assertTrue(len(set(results)) > 1, "Results are not randomly generated")

Expand All @@ -46,16 +59,23 @@ def test_random_seed_constant(self):
B = np.array([[0, 1], [3, 0]]) # Example payoff matrix for Player 2
population_size = 100
num_generations = 1000
random_seed = random.randrange(0, 1000) # Add a random_seed value as constant to generate same results in the evolution
random_seed = random.randrange(
0, 1000
) # Add a random_seed value as constant to generate same results in the evolution

# Run imitation dynamics multiple times and collect the results
results = []
for i in range(100): # Run 10 iterations
# Run imitation dynamics with random seed set to None (random initialization)
nash_equilibrium_player1, nash_equilibrium_player2 = next(imitation_dynamics(A, B, population_size, num_generations, random_seed))
results.append((tuple(nash_equilibrium_player1), tuple(nash_equilibrium_player2))) # Convert numpy arrays to tuples
nash_equilibrium_player1, nash_equilibrium_player2 = next(
imitation_dynamics(A, B, population_size, num_generations, random_seed)
)
results.append(
(tuple(nash_equilibrium_player1), tuple(nash_equilibrium_player2))
) # Convert numpy arrays to tuples
# Check if the results are different in at least one pair of iterations
self.assertTrue(len(set(results)) == 1, "Results are randomly generated")

if __name__ == '__main__':
unittest.main()

if __name__ == "__main__":
unittest.main()
53 changes: 38 additions & 15 deletions tests/unit/test_regret_minimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,39 +2,62 @@
import numpy as np
from nashpy.algorithms.regret_minimization import regret_minimization


class TestRegretMinimization(unittest.TestCase):

def test_regret_minimization_for_zerosum_game(self):
# Test case values
A = np.array([[0, -1, 1], [1, 0, -1], [-1, 1, 0]]) # Example payoff matrix for Player A
A = np.array(
[[0, -1, 1], [1, 0, -1], [-1, 1, 0]]
) # Example payoff matrix for Player A
B = -A # Example payoff matrix for Player B ( Zero Sum Game )
learning_rate = 0.1
max_iterations = 100
expected_nash_equilibrium_A = np.array([0.33333333, 0.33333333, 0.33333333]) # Expected Nash equilibrium strategy for Player 1
expected_nash_equilibrium_B = np.array([0.33333333, 0.33333333, 0.33333333]) # Expected Nash equilibrium strategy for Player 2
expected_nash_equilibrium_A = np.array(
[0.33333333, 0.33333333, 0.33333333]
) # Expected Nash equilibrium strategy for Player 1
expected_nash_equilibrium_B = np.array(
[0.33333333, 0.33333333, 0.33333333]
) # Expected Nash equilibrium strategy for Player 2

# Execute the regret minimization algorithm
actual_nash_equilibrium_A, actual_nash_equilibrium_B = next(regret_minimization(A, B, learning_rate, max_iterations))
actual_nash_equilibrium_A, actual_nash_equilibrium_B = next(
regret_minimization(A, B, learning_rate, max_iterations)
)

# Assert if the actual Nash equilibrium strategies match the expected strategies
self.assertTrue(np.allclose(actual_nash_equilibrium_A, expected_nash_equilibrium_A))
self.assertTrue(np.allclose(actual_nash_equilibrium_B, expected_nash_equilibrium_B))
self.assertTrue(
np.allclose(actual_nash_equilibrium_A, expected_nash_equilibrium_A)
)
self.assertTrue(
np.allclose(actual_nash_equilibrium_B, expected_nash_equilibrium_B)
)

def test_regret_minimization_non_zerosum_game(self):
# Test case values
A = np.array([[3, -1,3], [-1, 3,6], [-1, 1, 2]])
B = np.array([[-3, 1,4], [1, -3,3], [-1, 3, 4]])
A = np.array([[3, -1, 3], [-1, 3, 6], [-1, 1, 2]])
B = np.array([[-3, 1, 4], [1, -3, 3], [-1, 3, 4]])
learning_rate = 0.1
max_iterations = 100

expected_nash_equilibrium_A = np.array([0.0, 1.0, 0.0]) # Expected Nash equilibrium strategy for Player 1
expected_nash_equilibrium_B = np.array([0.0, 0.0, 1.0]) # Expected Nash equilibrium strategy for Player 2
expected_nash_equilibrium_A = np.array(
[0.0, 1.0, 0.0]
) # Expected Nash equilibrium strategy for Player 1
expected_nash_equilibrium_B = np.array(
[0.0, 0.0, 1.0]
) # Expected Nash equilibrium strategy for Player 2
# Execute the regret minimization algorithm
actual_nash_equilibrium_A, actual_nash_equilibrium_B = next(regret_minimization(A, B, learning_rate, max_iterations))
actual_nash_equilibrium_A, actual_nash_equilibrium_B = next(
regret_minimization(A, B, learning_rate, max_iterations)
)

# Assert if the actual Nash equilibrium strategies match the expected strategies
self.assertTrue(np.allclose(actual_nash_equilibrium_A, expected_nash_equilibrium_A))
self.assertTrue(np.allclose(actual_nash_equilibrium_B, expected_nash_equilibrium_B))
self.assertTrue(
np.allclose(actual_nash_equilibrium_A, expected_nash_equilibrium_A)
)
self.assertTrue(
np.allclose(actual_nash_equilibrium_B, expected_nash_equilibrium_B)
)


if __name__ == '__main__':
if __name__ == "__main__":
unittest.main()

0 comments on commit 7d4af19

Please sign in to comment.