Merge pull request #1001 from BStarcheus:dcfr_bugfix

PiperOrigin-RevId: 503951173 Change-Id: Ic7e021718e966188786bfdce9f96069a67199d8e
google-deepmind · Jan 23, 2023 · 39f77e3 · 39f77e3
2 parents bbe0007 + 6ac1cac
commit 39f77e3
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 2 deletions.
diff --git a/open_spiel/colabs/deep_cfr_pytorch.ipynb b/open_spiel/colabs/deep_cfr_pytorch.ipynb
@@ -354,7 +354,8 @@
         "      return state.returns()[player]\n",
         "    elif state.is_chance_node():\n",
         "      # If this is a chance node, sample an action\n",
-        "      action = np.random.choice([i[0] for i in state.chance_outcomes()])\n",
+        "      chance_outcome, chance_proba = zip(*state.chance_outcomes())\n",
+        "      action = np.random.choice(chance_outcome, p=chance_proba)\n",
         "      return self._traverse_game_tree(state.child(action), player)\n",
         "    elif state.current_player() == player:\n",
         "      sampled_regret = collections.defaultdict(float)\n",

diff --git a/open_spiel/python/algorithms/deep_cfr_tf2.py b/open_spiel/python/algorithms/deep_cfr_tf2.py
@@ -558,7 +558,8 @@ def _traverse_game_tree(self, state, player):
       return state.returns()[player]
     elif state.is_chance_node():
       # If this is a chance node, sample an action
-      action = np.random.choice([i[0] for i in state.chance_outcomes()])
+      chance_outcome, chance_proba = zip(*state.chance_outcomes())
+      action = np.random.choice(chance_outcome, p=chance_proba)
       return self._traverse_game_tree(state.child(action), player)
     elif state.current_player() == player:
       # Update the policy over the info set & actions via regret matching.