Add docs for nash averaging

jma127 · Feb 6, 2019 · 2ddbcf7 · 2ddbcf7
1 parent b42aefe
commit 2ddbcf7
Showing 1 changed file with 25 additions and 3 deletions.
diff --git a/pybrium/nash_averaging.py b/pybrium/nash_averaging.py
@@ -4,14 +4,36 @@
 
 
 def nash_average(payoffs, steps=1000000, tol=1e-6, lams=None, lr=None):
+    """Calculates the mixed Nash equilibrium strategy and the resulting
+    Nash average, as described by Balduzzi et al. (2018).
+
+    Args:
+        payoffs (torch.Tensor):
+            Antisymmetric payoff matrix.
+        steps (int, optional):
+            Number of SGD steps to use in calculations (default: 1000000).
+        tol (float, optional):
+            Tolerance for asymmetries (default: 1e-6).
+        lams (torch.Tensor):
+            Initialization logits (default: auto-initialied).
+        lr (float):
+            SGD learning rate (default: auto-computed).
+    Returns:
+        Tuple[torch.Tensor, torch.Tensor] whose first element is the mixed Nash
+        equilibrium strategy with maximum entropy, and whose second element is
+        the Nash averaging skill ratings.
+
+    Balduzzi et al., "Re-evaluating Evaluation", 2018,
+        https://arxiv.org/abs/1806.02643
+    """
     assert payoffs.dim() == 2 and payoffs.size(0) == payoffs.size(1)
 
     payoff_stack = torch.stack([payoffs, -payoffs])
 
     solution = solve_maxent_ce(payoff_stack, steps=steps, lams=lams, lr=lr)
-    reduced_policy = solution.sum(dim=1)
-    nash_avg = torch.mm(payoffs, reduced_policy.view(-1, 1))
+    reduced_strategy = solution.sum(dim=1)
+    nash_avg = torch.mm(payoffs, reduced_strategy.view(-1, 1))
 
     assert (torch.abs(solution - solution.transpose(0, 1)) < tol).all()
 
-    return reduced_policy, nash_avg
+    return reduced_strategy, nash_avg