Merge 25cf7b7 into 4176da8

bcollazo · Jan 23, 2022 · a3f0d33 · a3f0d33
2 parents 4176da8 + 25cf7b7
commit a3f0d33
Show file tree

Hide file tree

Showing 10 changed files with 157 additions and 49 deletions.
diff --git a/catanatron_core/catanatron/game.py b/catanatron_core/catanatron/game.py
@@ -53,6 +53,8 @@ def __init__(
         self,
         players: Iterable[Player],
         seed: int = None,
+        discard_limit: int = 7,
+        vps_to_win: int = 10,
         catan_map: BaseMap = None,
         initialize: bool = True,
     ):
@@ -61,6 +63,8 @@ def __init__(
         Args:
             players (Iterable[Player]): list of players, should be at most 4.
             seed (int, optional): Random seed to use (for reproducing games). Defaults to None.
+            discard_limit (int, optional): Discard limit to use. Defaults to 7.
+            vps_to_win (int, optional): Victory Points needed to win. Defaults to 10.
             catan_map (BaseMap, optional): Map configuration to use. Defaults to None.
             initialize (bool, optional): Whether to initialize. Defaults to True.
         """
@@ -69,7 +73,10 @@ def __init__(
             random.seed(self.seed)
 
             self.id = str(uuid.uuid4())
-            self.state = State(players, catan_map or BaseMap())
+            self.vps_to_win = vps_to_win
+            self.state = State(
+                players, catan_map or BaseMap(), discard_limit=discard_limit
+            )
 
     def play(self, accumulators=[], decide_fn=None):
         """Executes game until a player wins or exceeded TURNS_LIMIT.
@@ -132,7 +139,10 @@ def winning_color(self) -> Union[Color, None]:
         winning_player = None
         for player in self.state.players:
             key = player_key(self.state, player.color)
-            if self.state.player_state[f"{key}_ACTUAL_VICTORY_POINTS"] >= 10:
+            if (
+                self.state.player_state[f"{key}_ACTUAL_VICTORY_POINTS"]
+                >= self.vps_to_win
+            ):
                 winning_player = player
 
         return None if winning_player is None else winning_player.color
@@ -147,5 +157,6 @@ def copy(self) -> "Game":
         game_copy = Game([], None, None, initialize=False)
         game_copy.seed = self.seed
         game_copy.id = self.id
+        game_copy.vps_to_win = self.vps_to_win
         game_copy.state = self.state.copy()
         return game_copy
diff --git a/catanatron_core/catanatron/state.py b/catanatron_core/catanatron/state.py
@@ -44,11 +44,6 @@
     player_resource_deck_contains,
 )
 
-# For now have some Game-Configuration aspects hard-coded here
-#   eventually it might become part of an immutable (not copied)
-#   property for game-config in state.
-DISCARD_LIMIT = 7
-
 # These will be prefixed by P0_, P1_, ...
 # Create Player State blueprint
 PLAYER_INITIAL_STATE = {
@@ -111,10 +106,11 @@ class State:
         playable_actions (List[Action]): List of playable actions by current player.
     """
 
-    def __init__(self, players, catan_map=None, initialize=True):
+    def __init__(self, players, catan_map=None, discard_limit=7, initialize=True):
         if initialize:
             self.players = random.sample(players, len(players))
             self.board = Board(catan_map or BaseMap())
+            self.discard_limit = discard_limit
 
             # feature-ready dictionary
             self.player_state = dict()
@@ -155,19 +151,20 @@ def current_player(self):
 
     def copy(self):
         """Creates a copy of this State class that can be modified without
-        repercusions to this one.
+        repercusions to this one. Immutable values are just copied over.
 
         Returns:
             State: State copy.
         """
         state_copy = State(None, None, initialize=False)
         state_copy.players = self.players
+        state_copy.discard_limit = self.discard_limit  # immutable
 
         state_copy.board = self.board.copy()
 
         state_copy.player_state = self.player_state.copy()
         state_copy.color_to_index = self.color_to_index
-        state_copy.colors = self.colors  # immutable, so no need to copy
+        state_copy.colors = self.colors  # immutable
 
         # TODO: Move Deck to functional code, so as to quick-copy arrays.
         state_copy.resource_deck = pickle.loads(pickle.dumps(self.resource_deck))
@@ -415,7 +412,7 @@ def apply_action(state: State, action: Action):
 
         if number == 7:
             discarders = [
-                player_num_resource_cards(state, color) > DISCARD_LIMIT
+                player_num_resource_cards(state, color) > state.discard_limit
                 for color in state.colors
             ]
             is_discarding = any(discarders)

diff --git a/catanatron_experimental/catanatron_experimental/alphatan/pit.py b/catanatron_experimental/catanatron_experimental/alphatan/pit.py
@@ -18,4 +18,4 @@
     # AlphaTan(Color.BLUE, uuid.uuid4(), model, temp=0, num_simulations=10),
     AlphaTan(Color.RED, uuid.uuid4(), model, temp=0, num_simulations=10),
 ]
-wins, vp_history = play_batch(10, players, None, save_in_db, False)
+wins, vp_history = play_batch(10, players)
diff --git a/catanatron_experimental/catanatron_experimental/bayesian.py b/catanatron_experimental/catanatron_experimental/bayesian.py
@@ -43,9 +43,7 @@ def black_box_function(a, b, c, d, e, f, g, h, i, j, k, l):
         ValueFunctionPlayer(Color.RED, "C", params=DEFAULT_WEIGHTS),
         ValueFunctionPlayer(Color.BLUE, "C", params=weights),
     ]
-    wins, results_by_player = play_batch(
-        100, players, None, False, False, verbose="ERROR"
-    )
+    wins, results_by_player = play_batch(100, players)
     vps = results_by_player[players[1].color]
     avg_vps = sum(vps) / len(vps)
     return 100 * wins[players[1].color] + avg_vps

diff --git a/catanatron_experimental/catanatron_experimental/machine_learning/players/minimax.py b/catanatron_experimental/catanatron_experimental/machine_learning/players/minimax.py
@@ -1,4 +1,5 @@
 import time
+import random
 
 from catanatron.state_functions import (
     get_longest_road_length,
@@ -67,17 +68,23 @@ class ValueFunctionPlayer(Player):
     For now, the base value function only considers 1 enemy player.
     """
 
-    def __init__(self, color, value_fn_builder_name=None, params=None, is_bot=True):
+    def __init__(
+        self, color, value_fn_builder_name=None, params=None, is_bot=True, epsilon=None
+    ):
         super().__init__(color, is_bot)
         self.value_fn_builder_name = (
             "contender_fn" if value_fn_builder_name == "C" else "base_fn"
         )
         self.params = params
+        self.epsilon = epsilon
 
     def decide(self, game, playable_actions):
         if len(playable_actions) == 1:
             return playable_actions[0]
 
+        if self.epsilon is not None and random.random() < self.epsilon:
+            return random.choice(playable_actions)
+
         best_value = float("-inf")
         best_action = None
         for action in playable_actions:
@@ -155,6 +162,13 @@ def fn(game, p0_color):
             params["longest_road"] if num_buildable_nodes == 0 else 0.1
         )
 
+        # TODO: Do roads harmonize? That is what is the length of the longest
+        # _possible_ road. (To try to avoid bot from building extraneous roads)
+
+        # TODO: Weight longest road much lower if in early game.
+
+        # TODO: Weight monopoly better in later in the game.
+
         return float(
             game.state.player_state[f"{key}_VICTORY_POINTS"] * params["public_vps"]
             + production * params["production"]
@@ -215,6 +229,7 @@ def __init__(
         prunning=False,
         value_fn_builder_name=None,
         params=DEFAULT_WEIGHTS,
+        epsilon=None,
     ):
         super().__init__(color)
         self.depth = int(depth)
@@ -224,6 +239,7 @@ def __init__(
         )
         self.params = params
         self.use_value_function = None
+        self.epsilon = epsilon
 
     def value_function(self, game, p0_color):
         raise NotImplementedError
@@ -238,6 +254,9 @@ def decide(self, game: Game, playable_actions):
         if len(actions) == 1:
             return actions[0]
 
+        if self.epsilon is not None and random.random() < self.epsilon:
+            return random.choice(playable_actions)
+
         start = time.time()
         state_id = str(len(game.state.actions))
         node = DebugStateNode(state_id, self.color)  # i think it comes from outside

diff --git a/catanatron_experimental/catanatron_experimental/optunation.py b/catanatron_experimental/catanatron_experimental/optunation.py
@@ -39,9 +39,7 @@ def objective(trial):
         ValueFunctionPlayer(Color.RED, "C", params=DEFAULT_WEIGHTS),
         ValueFunctionPlayer(Color.BLUE, "C", params=weights),
     ]
-    wins, results_by_player = play_batch(
-        200, players, None, False, False, loglevel="ERROR"
-    )
+    wins, results_by_player = play_batch(200, players)
     vps = results_by_player[players[1].color]
     avg_vps = sum(vps) / len(vps)
     return 1000 * wins[players[1].color] + avg_vps

diff --git a/catanatron_experimental/catanatron_experimental/play.py b/catanatron_experimental/catanatron_experimental/play.py
@@ -1,4 +1,5 @@
 import os
+from dataclasses import dataclass
 
 import click
 from rich.console import Console
@@ -87,9 +88,20 @@ def render(self, task):
         """,
 )
 @click.option(
-    "--quiet/--no-quiet",
+    "--config-discard-limit",
+    default=7,
+    help="Sets Discard Limit to use in games.",
+)
+@click.option(
+    "--config-vps-to-win",
+    default=10,
+    help="Sets Victory Points needed to win games.",
+)
+@click.option(
+    "--quiet",
     default=False,
-    help="Whether to print results to the console",
+    is_flag=True,
+    help="Silence console output. Useful for debugging.",
 )
 @click.option(
     "--help-players",
@@ -98,7 +110,18 @@ def render(self, task):
     help="Show player codes and exits.",
     is_flag=True,
 )
-def simulate(num, players, output, json, csv, db, quiet, help_players):
+def simulate(
+    num,
+    players,
+    output,
+    json,
+    csv,
+    db,
+    config_discard_limit,
+    config_vps_to_win,
+    quiet,
+    help_players,
+):
     """
     Catan Bot Simulator.
     Catanatron allows you to simulate millions of games at scale
@@ -128,7 +151,31 @@ def simulate(num, players, output, json, csv, db, quiet, help_players):
                 players.append(player)
                 break
 
-    play_batch(num, players, output, json, csv, db, quiet)
+    output_options = OutputOptions(output, csv, json, db)
+    game_config = GameConfigOptions(config_discard_limit, config_vps_to_win)
+    play_batch(
+        num,
+        players,
+        output_options,
+        game_config,
+        quiet,
+    )
+
+
+@dataclass(frozen=True)
+class OutputOptions:
+    """Class to keep track of output CLI flags"""
+
+    output: str  # path to store files
+    csv: bool = False
+    json: bool = False
+    db: bool = False
+
+
+@dataclass(frozen=True)
+class GameConfigOptions:
+    discard_limit: int = 7
+    vps_to_win: int = 10
 
 
 COLOR_TO_RICH_STYLE = {
@@ -151,38 +198,43 @@ def rich_color(color):
     return f"[{style}]{color.value}[/{style}]"
 
 
-def play_batch_core(num_games, players, accumulators=[]):
+def play_batch_core(num_games, players, game_config, accumulators=[]):
     for _ in range(num_games):
         for player in players:
             player.reset_state()
-        game = Game(players)
+        game = Game(
+            players,
+            discard_limit=game_config.discard_limit,
+            vps_to_win=game_config.vps_to_win,
+        )
         game.play(accumulators)
         yield game
 
 
 def play_batch(
     num_games,
     players,
-    output=None,
-    json=False,
-    csv=False,
-    db=False,
+    output_options=None,
+    game_config=None,
     quiet=False,
 ):
+    output_options = output_options or OutputOptions()
+    game_config = game_config or GameConfigOptions()
+
     statistics_accumulator = StatisticsAccumulator()
     vp_accumulator = VpDistributionAccumulator()
     accumulators = [statistics_accumulator, vp_accumulator]
-    if output:
-        ensure_dir(output)
-    if output and csv:
-        accumulators.append(CsvDataAccumulator(output))
-    if output and json:
-        accumulators.append(JsonDataAccumulator(output))
-    if db:
+    if output_options.output:
+        ensure_dir(output_options.output)
+    if output_options.output and output_options.csv:
+        accumulators.append(CsvDataAccumulator(output_options.output))
+    if output_options.output and output_options.json:
+        accumulators.append(JsonDataAccumulator(output_options.output))
+    if output_options.db:
         accumulators.append(DatabaseAccumulator())
 
     if quiet:
-        for _ in play_batch_core(num_games, players, accumulators):
+        for _ in play_batch_core(num_games, players, game_config, accumulators):
             pass
         return
 
@@ -196,7 +248,7 @@ def play_batch(
     for player in players:
         table.add_column(f"{player.color.value} VP", justify="right")
     table.add_column("WINNER")
-    if db:
+    if output_options.db:
         table.add_column("LINK", overflow="fold")
 
     with Progress(
@@ -214,7 +266,9 @@ def play_batch(
             for player in players
         ]
 
-        for i, game in enumerate(play_batch_core(num_games, players, accumulators)):
+        for i, game in enumerate(
+            play_batch_core(num_games, players, game_config, accumulators)
+        ):
             winning_color = game.winning_color()
 
             if (num_games - last_n) < (i + 1):
@@ -228,7 +282,7 @@ def play_batch(
                     points = get_actual_victory_points(game.state, player.color)
                     row.append(str(points))
                 row.append(rich_color(winning_color))
-                if db:
+                if output_options.db:
                     row.append(accumulators[-1].link)
 
                 table.add_row(*row)
@@ -284,8 +338,8 @@ def play_batch(
     table.add_row(avg_ticks, avg_turns, avg_duration)
     console.print(table)
 
-    if output and csv:
-        console.print(f"GZIP CSVs saved at: [green]{output}[/green]")
+    if output_options.output and output_options.csv:
+        console.print(f"GZIP CSVs saved at: [green]{output_options.output}[/green]")
 
     return (
         dict(statistics_accumulator.wins),

diff --git a/catanatron_experimental/catanatron_experimental/rayopt.py b/catanatron_experimental/catanatron_experimental/rayopt.py
@@ -18,9 +18,7 @@ def objective(config):
         ValueFunctionPlayer(Color.RED, "C", params=DEFAULT_WEIGHTS),
         ValueFunctionPlayer(Color.BLUE, "C", params=config),
     ]
-    wins, results_by_player = play_batch(
-        100, players, None, False, False, loglevel="ERROR"
-    )
+    wins, results_by_player = play_batch(100, players)
     vps = results_by_player[players[1].color]
     avg_vps = sum(vps) / len(vps)
     score = 100 * wins[players[1].color] + avg_vps