reformatted the code using black

drvinceknight · Mar 21, 2024 · 7d4af19 · 7d4af19
1 parent c59ceee
commit 7d4af19
Show file tree

Hide file tree

Showing 5 changed files with 137 additions and 62 deletions.
diff --git a/src/nashpy/algorithms/regret_minimization.py b/src/nashpy/algorithms/regret_minimization.py
@@ -2,41 +2,45 @@
 from typing import Generator, Tuple, Any
 import numpy.typing as npt
 
+
 def compute_regrets(strategy_utilities, current_strategy):
-    '''
-    This function calculates the regrets for a player based on the strategy utilities and the current strategy. 
-    Regrets represent the difference between the utility achieved by playing a strategy and the maximum utility 
-    that could have been achieved by playing any strategy. 
+    """
+    This function calculates the regrets for a player based on the strategy utilities and the current strategy.
+    Regrets represent the difference between the utility achieved by playing a strategy and the maximum utility
+    that could have been achieved by playing any strategy.
     In this implementation, only positive regrets are considered
-    '''
+    """
     regrets = np.maximum(0, strategy_utilities - current_strategy)
     return regrets
 
+
 def update_strategy(current_strategy, regrets, learning_rate):
-    '''
+    """
     This function updates the player's strategy based on the regrets, the current strategy, and a fixed learning rate.
-    It scales the regrets by the learning rate and adds them to the current strategy. 
+    It scales the regrets by the learning rate and adds them to the current strategy.
     Finally, it normalizes the updated strategy to ensure that the probabilities sum up to 1.
-    '''
+    """
     updated_strategy = current_strategy + learning_rate * regrets
     return updated_strategy / np.sum(updated_strategy)
 
+
 def generate_abs_strategy(strategy_list):
-    '''
+    """
     This function will return most favorable utility by a player based on the max probability value
-    '''
+    """
     max_probability = max(strategy_list)
     strategy_relative = [1 if x == max_probability else 0 for x in strategy_list]
     sum_value_in_the_list = sum(strategy_relative)
-    favorable_strategy = [ x / sum_value_in_the_list for x in strategy_relative]
+    favorable_strategy = [x / sum_value_in_the_list for x in strategy_relative]
     return favorable_strategy
 
+
 def regret_minimization(
-    A: npt.NDArray, B: npt.NDArray, learning_rate = 0.1, max_iterations=100
+    A: npt.NDArray, B: npt.NDArray, learning_rate=0.1, max_iterations=100
 ) -> Generator[Tuple[float, float], Any, None]:
     """
     Obtain the Nash equilibria using regret minimization method using N number of itreations.
-    The code provided is based on the concept of regret matching, 
+    The code provided is based on the concept of regret matching,
     with the fixed learning rate.
 
     Algorithm implemented here is Algorithm 4.3 Theorem 4.4 of [Nisan2007]_
@@ -52,12 +56,12 @@ def regret_minimization(
 
     learning_rate: float ( Optional Defaulted to 0.1 )
         The  learning_rate determines the magnitude of the update towards the regrets
-        The learning rate scales the regrets before they are added to the current strategy. 
-        A higher learning rate results in a larger update, while a lower learning rate leads to a smaller update. 
+        The learning rate scales the regrets before they are added to the current strategy.
+        A higher learning rate results in a larger update, while a lower learning rate leads to a smaller update.
         This value allows you to control the pace towards a Nash equilibrium.
 
     max_itreations: Integer ( Optional Defaulted to 100 )
-        This value is defaulted to 100 itrations, this number could be modified to a larger or smaller number based on the untilities/payoff matrix shape 
+        This value is defaulted to 100 itrations, this number could be modified to a larger or smaller number based on the untilities/payoff matrix shape
 
 
     Yields
@@ -82,4 +86,4 @@ def regret_minimization(
     strategy_A_final = generate_abs_strategy(strategy_A)
     strategy_B_final = generate_abs_strategy(strategy_B)
 
-    yield strategy_A_final, strategy_B_final
+    yield strategy_A_final, strategy_B_final
diff --git a/src/nashpy/egt/imitation_dynamics.py b/src/nashpy/egt/imitation_dynamics.py
@@ -2,8 +2,9 @@
 from typing import Generator, Tuple, Any
 import numpy.typing as npt
 
+
 def payoff(player_strategy, opponent_strategy, player_payoff_matrix):
-    '''
+    """
     Calculate the payoff of a player given their strategy and the opponent's strategy.
 
     Parameters:
@@ -13,13 +14,19 @@ def payoff(player_strategy, opponent_strategy, player_payoff_matrix):
 
     Returns:
     - payoff: scalar representing the payoff of the player
-    '''
+    """
     return np.dot(player_strategy, np.dot(player_payoff_matrix, opponent_strategy))
 
+
 def imitation_dynamics(
-        A: npt.NDArray, B: npt.NDArray, population_size=100, num_of_generations=1000, random_seed=None,threshold=0.5
-        ) -> Generator[Tuple[float, float], Any, None]:
-    '''
+    A: npt.NDArray,
+    B: npt.NDArray,
+    population_size=100,
+    num_of_generations=1000,
+    random_seed=None,
+    threshold=0.5,
+) -> Generator[Tuple[float, float], Any, None]:
+    """
     Simulate the imitation dynamics for a given game represented by payoff matrices A and B.
 
     Parameters:
@@ -33,21 +40,31 @@ def imitation_dynamics(
     Yields:
     - nash_equilibrium_player1: numpy array representing the Nash equilibrium strategy for Player 1
     - nash_equilibrium_player2: numpy array representing the Nash equilibrium strategy for Player 2
-    '''
+    """
 
     num_strategies = len(A)
 
     # Initialize population
     if random_seed:
-        np.random.seed(random_seed) # Set random seed for reproducibility 
+        np.random.seed(random_seed)  # Set random seed for reproducibility
 
     population_A = np.random.dirichlet(np.ones(num_strategies), size=population_size)
     population_B = np.random.dirichlet(np.ones(num_strategies), size=population_size)
 
     for generation in range(num_of_generations):
         # Play the game
-        payoffs_A = np.array([payoff(population_A[i], population_B[i], A) for i in range(population_size)])
-        payoffs_B = np.array([payoff(population_B[i], population_A[i], B) for i in range(population_size)])
+        payoffs_A = np.array(
+            [
+                payoff(population_A[i], population_B[i], A)
+                for i in range(population_size)
+            ]
+        )
+        payoffs_B = np.array(
+            [
+                payoff(population_B[i], population_A[i], B)
+                for i in range(population_size)
+            ]
+        )
 
         # Update population based on payoffs
         # Used Imitation dynamics in which the players copy the strategy of the most successful individual
@@ -66,4 +83,4 @@ def imitation_dynamics(
     nash_equilibrium_B[nash_equilibrium_B >= threshold] = 1
     nash_equilibrium_B[nash_equilibrium_B < threshold] = 0
 
-    yield nash_equilibrium_A, nash_equilibrium_B
+    yield nash_equilibrium_A, nash_equilibrium_B
diff --git a/src/nashpy/game.py b/src/nashpy/game.py
@@ -425,7 +425,7 @@ def linear_program(self):
         column_strategy = linear_program(row_player_payoff_matrix=B.T)
         return row_strategy, column_strategy
 
-    def regret_minimization(self,learning_rate = 0.1, max_iterations=100):
+    def regret_minimization(self, learning_rate=0.1, max_iterations=100):
         """
         Build best Strategies probability of both players using regret minimization method
         Algorithm implemented here is Algorithm 4.3 Theorem 4.4 of [Nisan2007]_
@@ -435,12 +435,12 @@ def regret_minimization(self,learning_rate = 0.1, max_iterations=100):
 
         learning_rate: float ( Optional Defaulted to 0.1 )
             The  learning_rate determines the magnitude of the update towards the regrets
-            The learning rate scales the regrets before they are added to the current strategy. 
-            A higher learning rate results in a larger update, while a lower learning rate leads to a smaller update. 
+            The learning rate scales the regrets before they are added to the current strategy.
+            A higher learning rate results in a larger update, while a lower learning rate leads to a smaller update.
             This value allows you to control the pace towards a Nash equilibrium.
 
         max_itreations: Integer ( Optional Defaulted to 100 )
-            This value is defaulted to 100 itrations, this number could be modified to a larger or smaller number based on the untilities/payoff matrix shape 
+            This value is defaulted to 100 itrations, this number could be modified to a larger or smaller number based on the untilities/payoff matrix shape
         Returns
         -------
         tuple
@@ -451,7 +451,13 @@ def regret_minimization(self,learning_rate = 0.1, max_iterations=100):
             A=A, B=B, learning_rate=learning_rate, max_iterations=max_iterations
         )
 
-    def imitation_dynamics(self,population_size=100, num_of_generations=1000,random_seed=None,threshold=0.5 ):
+    def imitation_dynamics(
+        self,
+        population_size=100,
+        num_of_generations=1000,
+        random_seed=None,
+        threshold=0.5,
+    ):
         """
         Simulate the imitation dynamics for a given game represented by payoff matrices A and B.
 
@@ -470,5 +476,10 @@ def imitation_dynamics(self,population_size=100, num_of_generations=1000,random_
         """
         A, B = self.payoff_matrices
         return imitation_dynamics(
-            A=A, B=B, population_size=population_size, num_of_generations=num_of_generations,random_seed=random_seed,threshold=threshold
-        )
+            A=A,
+            B=B,
+            population_size=population_size,
+            num_of_generations=num_of_generations,
+            random_seed=random_seed,
+            threshold=threshold,
+        )
diff --git a/tests/unit/test_imitation_dynamics.py b/tests/unit/test_imitation_dynamics.py
@@ -1,23 +1,32 @@
-from  nashpy.egt.imitation_dynamics import imitation_dynamics
+from nashpy.egt.imitation_dynamics import imitation_dynamics
 import numpy as np
 import unittest
 import random
-class TestImitationDynamics(unittest.TestCase):
 
+
+class TestImitationDynamics(unittest.TestCase):
     def test_positive_payoffs(self):
         A = np.array([[3, 0], [1, 3]])  # Payoff matrix for Player 1
         B = np.array([[0, 1], [3, 0]])  # Payoff matrix for Player 2
-        nash_equilibrium_player1, nash_equilibrium_player2 = next(imitation_dynamics(A, B))
+        nash_equilibrium_player1, nash_equilibrium_player2 = next(
+            imitation_dynamics(A, B)
+        )
         # Assert that Nash equilibrium strategies are within the expected range
         self.assertTrue(np.all(nash_equilibrium_player1 >= 0))
         self.assertTrue(np.all(nash_equilibrium_player1 <= 1))
         self.assertTrue(np.all(nash_equilibrium_player2 >= 0))
         self.assertTrue(np.all(nash_equilibrium_player2 <= 1))
 
     def test_negative_payoffs(self):
-        A = np.array([[-1, 0], [0, -1]])  # Payoff matrix for Player 1 (negative payoffs)
-        B = np.array([[0, -1], [-1, 0]])  # Payoff matrix for Player 2 (negative payoffs)
-        nash_equilibrium_player1, nash_equilibrium_player2 = next(imitation_dynamics(A, B))
+        A = np.array(
+            [[-1, 0], [0, -1]]
+        )  # Payoff matrix for Player 1 (negative payoffs)
+        B = np.array(
+            [[0, -1], [-1, 0]]
+        )  # Payoff matrix for Player 2 (negative payoffs)
+        nash_equilibrium_player1, nash_equilibrium_player2 = next(
+            imitation_dynamics(A, B)
+        )
         # Assert that Nash equilibrium strategies are within the expected range
         self.assertTrue(np.all(nash_equilibrium_player1 >= 0))
         self.assertTrue(np.all(nash_equilibrium_player1 <= 1))
@@ -35,8 +44,12 @@ def test_randomness(self):
         results = []
         for i in range(10):  # Run 10 iterations
             # Run imitation dynamics with random seed set to None (random initialization)
-            nash_equilibrium_player1, nash_equilibrium_player2 = next(imitation_dynamics(A, B, population_size, num_generations))
-            results.append((tuple(nash_equilibrium_player1), tuple(nash_equilibrium_player2)))  # Convert numpy arrays to tuples
+            nash_equilibrium_player1, nash_equilibrium_player2 = next(
+                imitation_dynamics(A, B, population_size, num_generations)
+            )
+            results.append(
+                (tuple(nash_equilibrium_player1), tuple(nash_equilibrium_player2))
+            )  # Convert numpy arrays to tuples
         # Check if the results are different in at least one pair of iterations
         self.assertTrue(len(set(results)) > 1, "Results are not randomly generated")
 
@@ -46,16 +59,23 @@ def test_random_seed_constant(self):
         B = np.array([[0, 1], [3, 0]])  # Example payoff matrix for Player 2
         population_size = 100
         num_generations = 1000
-        random_seed = random.randrange(0, 1000) # Add a random_seed value as constant to generate same results in the evolution
+        random_seed = random.randrange(
+            0, 1000
+        )  # Add a random_seed value as constant to generate same results in the evolution
 
         # Run imitation dynamics multiple times and collect the results
         results = []
         for i in range(100):  # Run 10 iterations
             # Run imitation dynamics with random seed set to None (random initialization)
-            nash_equilibrium_player1, nash_equilibrium_player2 = next(imitation_dynamics(A, B, population_size, num_generations, random_seed))
-            results.append((tuple(nash_equilibrium_player1), tuple(nash_equilibrium_player2)))  # Convert numpy arrays to tuples
+            nash_equilibrium_player1, nash_equilibrium_player2 = next(
+                imitation_dynamics(A, B, population_size, num_generations, random_seed)
+            )
+            results.append(
+                (tuple(nash_equilibrium_player1), tuple(nash_equilibrium_player2))
+            )  # Convert numpy arrays to tuples
         # Check if the results are different in at least one pair of iterations
         self.assertTrue(len(set(results)) == 1, "Results are randomly generated")
 
-if __name__ == '__main__':
-    unittest.main()
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/unit/test_regret_minimization.py b/tests/unit/test_regret_minimization.py
@@ -2,39 +2,62 @@
 import numpy as np
 from nashpy.algorithms.regret_minimization import regret_minimization
 
+
 class TestRegretMinimization(unittest.TestCase):
-
     def test_regret_minimization_for_zerosum_game(self):
         # Test case values
-        A = np.array([[0, -1, 1], [1, 0, -1], [-1, 1, 0]])  # Example payoff matrix for Player A
+        A = np.array(
+            [[0, -1, 1], [1, 0, -1], [-1, 1, 0]]
+        )  # Example payoff matrix for Player A
         B = -A  # Example payoff matrix for Player B ( Zero Sum Game )
         learning_rate = 0.1
         max_iterations = 100
-        expected_nash_equilibrium_A = np.array([0.33333333, 0.33333333, 0.33333333])  # Expected Nash equilibrium strategy for Player 1
-        expected_nash_equilibrium_B = np.array([0.33333333, 0.33333333, 0.33333333])  # Expected Nash equilibrium strategy for Player 2
+        expected_nash_equilibrium_A = np.array(
+            [0.33333333, 0.33333333, 0.33333333]
+        )  # Expected Nash equilibrium strategy for Player 1
+        expected_nash_equilibrium_B = np.array(
+            [0.33333333, 0.33333333, 0.33333333]
+        )  # Expected Nash equilibrium strategy for Player 2
 
         # Execute the regret minimization algorithm
-        actual_nash_equilibrium_A, actual_nash_equilibrium_B = next(regret_minimization(A, B, learning_rate, max_iterations))
+        actual_nash_equilibrium_A, actual_nash_equilibrium_B = next(
+            regret_minimization(A, B, learning_rate, max_iterations)
+        )
 
         # Assert if the actual Nash equilibrium strategies match the expected strategies
-        self.assertTrue(np.allclose(actual_nash_equilibrium_A, expected_nash_equilibrium_A))
-        self.assertTrue(np.allclose(actual_nash_equilibrium_B, expected_nash_equilibrium_B))
+        self.assertTrue(
+            np.allclose(actual_nash_equilibrium_A, expected_nash_equilibrium_A)
+        )
+        self.assertTrue(
+            np.allclose(actual_nash_equilibrium_B, expected_nash_equilibrium_B)
+        )
 
     def test_regret_minimization_non_zerosum_game(self):
         # Test case values
-        A = np.array([[3, -1,3], [-1, 3,6], [-1, 1, 2]])
-        B = np.array([[-3, 1,4], [1, -3,3], [-1, 3, 4]])
+        A = np.array([[3, -1, 3], [-1, 3, 6], [-1, 1, 2]])
+        B = np.array([[-3, 1, 4], [1, -3, 3], [-1, 3, 4]])
         learning_rate = 0.1
         max_iterations = 100
 
-        expected_nash_equilibrium_A = np.array([0.0, 1.0, 0.0])  # Expected Nash equilibrium strategy for Player 1
-        expected_nash_equilibrium_B = np.array([0.0, 0.0, 1.0])  # Expected Nash equilibrium strategy for Player 2
+        expected_nash_equilibrium_A = np.array(
+            [0.0, 1.0, 0.0]
+        )  # Expected Nash equilibrium strategy for Player 1
+        expected_nash_equilibrium_B = np.array(
+            [0.0, 0.0, 1.0]
+        )  # Expected Nash equilibrium strategy for Player 2
         # Execute the regret minimization algorithm
-        actual_nash_equilibrium_A, actual_nash_equilibrium_B = next(regret_minimization(A, B, learning_rate, max_iterations))
+        actual_nash_equilibrium_A, actual_nash_equilibrium_B = next(
+            regret_minimization(A, B, learning_rate, max_iterations)
+        )
 
         # Assert if the actual Nash equilibrium strategies match the expected strategies
-        self.assertTrue(np.allclose(actual_nash_equilibrium_A, expected_nash_equilibrium_A))
-        self.assertTrue(np.allclose(actual_nash_equilibrium_B, expected_nash_equilibrium_B))
+        self.assertTrue(
+            np.allclose(actual_nash_equilibrium_A, expected_nash_equilibrium_A)
+        )
+        self.assertTrue(
+            np.allclose(actual_nash_equilibrium_B, expected_nash_equilibrium_B)
+        )
+
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()