Implement logit maxregret parameter in Python.

gambitproject · Mar 26, 2024 · 3db4dd4 · 3db4dd4
1 parent 68a4e7e
commit 3db4dd4
Show file tree

Hide file tree

Showing 9 changed files with 46 additions and 31 deletions.
diff --git a/doc/tools.logit.rst b/doc/tools.logit.rst
@@ -33,6 +33,13 @@ if an information set is not reached due to being the successor of chance
 moves with zero probability.  In such games, the implementation treats
 the beliefs at such information sets as being uniform across all member nodes.
 
+.. versionchanged:: 16.2.0
+
+   The criterion for accepting whether a point is sufficiently close to a
+   Nash equilibrium to terminate the path-following is specified
+   in terms of the maximum regret.  This regret is interpreted as a fraction
+   of the difference between the maximum and minimum payoffs in the game.
+
 .. program:: gambit-logit
 
 .. cmdoption:: -d
@@ -61,14 +68,9 @@ the beliefs at such information sets as being uniform across all member nodes.
 
 .. cmdoption:: -m
 
-   Stop when reaching the specified value of the
-   parameter lambda. By default, the tracing stops when lambda reaches
-   1,000,000, which is usually suitable for computing a good
-   approximation to a Nash equilibrium. For applications, such as to
-   laboratory experiments, where the behavior of the correspondence for
-   small values of lambda is of interest and the asymptotic behavior is
-   not relevant, setting MAXLAMBDA to a much smaller value may be
-   indicated.
+   .. versionadded:: 16.2.0
+
+   Specify the maximum regret criterion for acceptance as an approximate Nash equilibrium.
 
 .. cmdoption:: -l
 

diff --git a/src/pygambit/gambit.pxd b/src/pygambit/gambit.pxd
@@ -455,10 +455,10 @@ cdef extern from "solvers/gnm/gnm.h":
     ) except +RuntimeError
 
 cdef extern from "solvers/logit/nfglogit.h":
-    c_List[c_MixedStrategyProfileDouble] LogitStrategySolve(c_Game) except +RuntimeError
+    c_List[c_MixedStrategyProfileDouble] LogitStrategySolve(c_Game, double) except +RuntimeError
 
 cdef extern from "solvers/logit/efglogit.h":
-    c_List[c_MixedBehaviorProfileDouble] LogitBehaviorSolve(c_Game) except +RuntimeError
+    c_List[c_MixedBehaviorProfileDouble] LogitBehaviorSolve(c_Game, double) except +RuntimeError
 
 cdef extern from "solvers/logit/nfglogit.h":
     cdef cppclass c_LogitQREMixedStrategyProfile "LogitQREMixedStrategyProfile":

diff --git a/src/pygambit/nash.h b/src/pygambit/nash.h
@@ -53,12 +53,11 @@ std::shared_ptr<LogitQREMixedStrategyProfile> logit_atlambda(const Game &p_game,
       alg.SolveAtLambda(start, null_stream, p_lambda, 1.0));
 }
 
-List<LogitQREMixedStrategyProfile> logit_principal_branch(const Game &p_game,
-                                                          double p_maxLambda = 1000000.0)
+List<LogitQREMixedStrategyProfile> logit_principal_branch(const Game &p_game, double p_maxLambda)
 {
   LogitQREMixedStrategyProfile start(p_game);
   StrategicQREPathTracer alg;
   NullBuffer null_buffer;
   std::ostream null_stream(&null_buffer);
-  return alg.TraceStrategicPath(start, null_stream, p_maxLambda, 1.0);
+  return alg.TraceStrategicPath(start, null_stream, p_maxLambda, 0.0, 1.0);
 }
diff --git a/src/pygambit/nash.pxi b/src/pygambit/nash.pxi
@@ -33,6 +33,7 @@ def _convert_mspd(
 ) -> typing.List[MixedStrategyProfileDouble]:
     ret = []
     for i in range(inlist.Length()):
+        print(i)
         p = MixedStrategyProfileDouble()
         p.profile = copyitem_list_mspd(inlist, i+1)
         ret.append(p)
@@ -182,12 +183,12 @@ def _gnm_strategy_solve(
         raise
 
 
-def _logit_strategy_solve(game: Game) -> typing.List[MixedStrategyProfileDouble]:
-    return _convert_mspd(LogitStrategySolve(game.game))
+def _logit_strategy_solve(game: Game, maxregret: float) -> typing.List[MixedStrategyProfileDouble]:
+    return _convert_mspd(LogitStrategySolve(game.game, maxregret))
 
 
-def _logit_behavior_solve(game: Game) -> typing.List[MixedBehaviorProfileDouble]:
-    return _convert_mbpd(LogitBehaviorSolve(game.game))
+def _logit_behavior_solve(game: Game, maxregret: float) -> typing.List[MixedBehaviorProfileDouble]:
+    return _convert_mbpd(LogitBehaviorSolve(game.game, maxregret))
 
 
 @cython.cclass

diff --git a/src/pygambit/nash.py b/src/pygambit/nash.py
@@ -516,7 +516,9 @@ def gnm_solve(
 
 
 def logit_solve(
-        game: libgbt.Game, use_strategic: bool = False
+        game: libgbt.Game,
+        use_strategic: bool = False,
+        maxregret: float = 0.0001,
 ) -> NashComputationResult:
     """Compute Nash equilibria of a game using :ref:`the logit quantal response
     equilibrium correspondence <gambit-logit>`.
@@ -528,19 +530,29 @@ def logit_solve(
     ----------
     game : Game
         The game to compute equilibria in.
+
     use_strategic : bool, default False
         Whether to use the strategic form.  If True, always uses the strategic
         representation even if the game's native representation is extensive.
 
+    maxregret : float, default 0.0001
+        The acceptance criterion for approximate Nash equilibrium; the maximum
+        regret of any player must be no more than `maxregret` times the
+        difference of the maximum and minimum payoffs of the game
+
+        .. versionadded: 16.2.0
+
     Returns
     -------
     res : NashComputationResult
         The result represented as a ``NashComputationResult`` object.
     """
+    if maxregret <= 0.0:
+        raise ValueError("logit_solve(): maxregret argument must be positive")
     if not game.is_tree or use_strategic:
-        equilibria = libgbt._logit_strategy_solve(game)
+        equilibria = libgbt._logit_strategy_solve(game, maxregret)
     else:
-        equilibria = libgbt._logit_behavior_solve(game)
+        equilibria = libgbt._logit_behavior_solve(game, maxregret)
     return NashComputationResult(
         game=game,
         method="logit",

diff --git a/src/solvers/logit/path.cc b/src/solvers/logit/path.cc
@@ -138,9 +138,6 @@ void PathTracer::TracePath(const EquationSystem &p_system, Vector<double> &x, do
   q.GetRow(q.NumRows(), t);
 
   while (x[x.Length()] >= 0.0 && x[x.Length()] < p_maxLambda) {
-    if (p_terminate(x)) {
-      return;
-    }
     bool accept = true;
 
     if (fabs(h) <= c_hmin) {
@@ -243,6 +240,10 @@ void PathTracer::TracePath(const EquationSystem &p_system, Vector<double> &x, do
 
     // PC step was successful; update and iterate
     x = u;
+    if (p_terminate(x)) {
+      p_callback(x, true);
+      return;
+    }
     p_callback(x, false);
 
     if (t * newT < 0.0) {

diff --git a/src/tools/liap/liap.cc b/src/tools/liap/liap.cc
@@ -142,16 +142,16 @@ int main(int argc, char *argv[])
                                   {"verbose", 0, nullptr, 'V'},
                                   {nullptr, 0, nullptr, 0}};
   int c;
-  while ((c = getopt_long(argc, argv, "d:n:i:s:hqVvS", long_options, &long_opt_index)) != -1) {
+  while ((c = getopt_long(argc, argv, "d:n:i:s:m:hqVvS", long_options, &long_opt_index)) != -1) {
     switch (c) {
     case 'v':
       PrintBanner(std::cerr);
       exit(1);
     case 'd':
       numDecimals = atoi(optarg);
       break;
-    case 'n':
-      numTries = atoi(optarg);
+    case 'm':
+      maxregret = atof(optarg);
       break;
     case 'i':
       maxitsN = atoi(optarg);

diff --git a/src/tools/logit/logit.cc b/src/tools/logit/logit.cc
@@ -49,7 +49,8 @@ void PrintHelp(char *progname)
   std::cerr << "  -d DECIMALS      show equilibria as floating point with DECIMALS digits\n";
   std::cerr << "  -s STEP          initial stepsize (default is .03)\n";
   std::cerr << "  -a ACCEL         maximum acceleration (default is 1.1)\n";
-  std::cerr << "  -m MAXLAMBDA     stop when reaching MAXLAMBDA (default is 1000000)\n";
+  std::cerr << "  -m MAXREGRET     maximum regret acceptable as a proportion of range of\n";
+  std::cerr << "                   payoffs in the game\n";
   std::cerr << "  -l LAMBDA        compute QRE at `lambda` accurately\n";
   std::cerr << "  -L FILE          compute maximum likelihood estimates;\n";
   std::cerr << "                   read strategy frequencies from FILE\n";
@@ -120,7 +121,7 @@ int main(int argc, char *argv[])
       maxDecel = atof(optarg);
       break;
     case 'm':
-      maxLambda = atof(optarg);
+      maxregret = atof(optarg);
       break;
     case 'e':
       fullGraph = false;

diff --git a/tests/test_nash.py b/tests/test_nash.py
@@ -141,6 +141,5 @@ def test_logit_zerochance():
     g.append_infoset(g.root.children[1], g.root.children[0].infoset)
     win = g.add_outcome([1])
     g.set_outcome(g.root.children[0].children[0], win)
-    result = gbt.nash.logit_solve(g, use_strategic=False)
-    assert result.equilibria[0][g.players["Alice"].infosets[0].actions[0]] == 1
-    assert result.equilibria[0][g.players["Alice"].infosets[0].actions[1]] == 0
+    result = gbt.nash.logit_solve(g, use_strategic=False, maxregret=0.0001)
+    assert result.equilibria[0].max_regret() < 0.0001