Skip to content

Commit

Permalink
Merge 25cf7b7 into 4176da8
Browse files Browse the repository at this point in the history
  • Loading branch information
bcollazo committed Jan 23, 2022
2 parents 4176da8 + 25cf7b7 commit a3f0d33
Show file tree
Hide file tree
Showing 10 changed files with 157 additions and 49 deletions.
15 changes: 13 additions & 2 deletions catanatron_core/catanatron/game.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ def __init__(
self,
players: Iterable[Player],
seed: int = None,
discard_limit: int = 7,
vps_to_win: int = 10,
catan_map: BaseMap = None,
initialize: bool = True,
):
Expand All @@ -61,6 +63,8 @@ def __init__(
Args:
players (Iterable[Player]): list of players, should be at most 4.
seed (int, optional): Random seed to use (for reproducing games). Defaults to None.
discard_limit (int, optional): Discard limit to use. Defaults to 7.
vps_to_win (int, optional): Victory Points needed to win. Defaults to 10.
catan_map (BaseMap, optional): Map configuration to use. Defaults to None.
initialize (bool, optional): Whether to initialize. Defaults to True.
"""
Expand All @@ -69,7 +73,10 @@ def __init__(
random.seed(self.seed)

self.id = str(uuid.uuid4())
self.state = State(players, catan_map or BaseMap())
self.vps_to_win = vps_to_win
self.state = State(
players, catan_map or BaseMap(), discard_limit=discard_limit
)

def play(self, accumulators=[], decide_fn=None):
"""Executes game until a player wins or exceeded TURNS_LIMIT.
Expand Down Expand Up @@ -132,7 +139,10 @@ def winning_color(self) -> Union[Color, None]:
winning_player = None
for player in self.state.players:
key = player_key(self.state, player.color)
if self.state.player_state[f"{key}_ACTUAL_VICTORY_POINTS"] >= 10:
if (
self.state.player_state[f"{key}_ACTUAL_VICTORY_POINTS"]
>= self.vps_to_win
):
winning_player = player

return None if winning_player is None else winning_player.color
Expand All @@ -147,5 +157,6 @@ def copy(self) -> "Game":
game_copy = Game([], None, None, initialize=False)
game_copy.seed = self.seed
game_copy.id = self.id
game_copy.vps_to_win = self.vps_to_win
game_copy.state = self.state.copy()
return game_copy
15 changes: 6 additions & 9 deletions catanatron_core/catanatron/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,6 @@
player_resource_deck_contains,
)

# For now have some Game-Configuration aspects hard-coded here
# eventually it might become part of an immutable (not copied)
# property for game-config in state.
DISCARD_LIMIT = 7

# These will be prefixed by P0_, P1_, ...
# Create Player State blueprint
PLAYER_INITIAL_STATE = {
Expand Down Expand Up @@ -111,10 +106,11 @@ class State:
playable_actions (List[Action]): List of playable actions by current player.
"""

def __init__(self, players, catan_map=None, initialize=True):
def __init__(self, players, catan_map=None, discard_limit=7, initialize=True):
if initialize:
self.players = random.sample(players, len(players))
self.board = Board(catan_map or BaseMap())
self.discard_limit = discard_limit

# feature-ready dictionary
self.player_state = dict()
Expand Down Expand Up @@ -155,19 +151,20 @@ def current_player(self):

def copy(self):
"""Creates a copy of this State class that can be modified without
repercusions to this one.
repercusions to this one. Immutable values are just copied over.
Returns:
State: State copy.
"""
state_copy = State(None, None, initialize=False)
state_copy.players = self.players
state_copy.discard_limit = self.discard_limit # immutable

state_copy.board = self.board.copy()

state_copy.player_state = self.player_state.copy()
state_copy.color_to_index = self.color_to_index
state_copy.colors = self.colors # immutable, so no need to copy
state_copy.colors = self.colors # immutable

# TODO: Move Deck to functional code, so as to quick-copy arrays.
state_copy.resource_deck = pickle.loads(pickle.dumps(self.resource_deck))
Expand Down Expand Up @@ -415,7 +412,7 @@ def apply_action(state: State, action: Action):

if number == 7:
discarders = [
player_num_resource_cards(state, color) > DISCARD_LIMIT
player_num_resource_cards(state, color) > state.discard_limit
for color in state.colors
]
is_discarding = any(discarders)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@
# AlphaTan(Color.BLUE, uuid.uuid4(), model, temp=0, num_simulations=10),
AlphaTan(Color.RED, uuid.uuid4(), model, temp=0, num_simulations=10),
]
wins, vp_history = play_batch(10, players, None, save_in_db, False)
wins, vp_history = play_batch(10, players)
4 changes: 1 addition & 3 deletions catanatron_experimental/catanatron_experimental/bayesian.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,7 @@ def black_box_function(a, b, c, d, e, f, g, h, i, j, k, l):
ValueFunctionPlayer(Color.RED, "C", params=DEFAULT_WEIGHTS),
ValueFunctionPlayer(Color.BLUE, "C", params=weights),
]
wins, results_by_player = play_batch(
100, players, None, False, False, verbose="ERROR"
)
wins, results_by_player = play_batch(100, players)
vps = results_by_player[players[1].color]
avg_vps = sum(vps) / len(vps)
return 100 * wins[players[1].color] + avg_vps
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import time
import random

from catanatron.state_functions import (
get_longest_road_length,
Expand Down Expand Up @@ -67,17 +68,23 @@ class ValueFunctionPlayer(Player):
For now, the base value function only considers 1 enemy player.
"""

def __init__(self, color, value_fn_builder_name=None, params=None, is_bot=True):
def __init__(
self, color, value_fn_builder_name=None, params=None, is_bot=True, epsilon=None
):
super().__init__(color, is_bot)
self.value_fn_builder_name = (
"contender_fn" if value_fn_builder_name == "C" else "base_fn"
)
self.params = params
self.epsilon = epsilon

def decide(self, game, playable_actions):
if len(playable_actions) == 1:
return playable_actions[0]

if self.epsilon is not None and random.random() < self.epsilon:
return random.choice(playable_actions)

best_value = float("-inf")
best_action = None
for action in playable_actions:
Expand Down Expand Up @@ -155,6 +162,13 @@ def fn(game, p0_color):
params["longest_road"] if num_buildable_nodes == 0 else 0.1
)

# TODO: Do roads harmonize? That is what is the length of the longest
# _possible_ road. (To try to avoid bot from building extraneous roads)

# TODO: Weight longest road much lower if in early game.

# TODO: Weight monopoly better in later in the game.

return float(
game.state.player_state[f"{key}_VICTORY_POINTS"] * params["public_vps"]
+ production * params["production"]
Expand Down Expand Up @@ -215,6 +229,7 @@ def __init__(
prunning=False,
value_fn_builder_name=None,
params=DEFAULT_WEIGHTS,
epsilon=None,
):
super().__init__(color)
self.depth = int(depth)
Expand All @@ -224,6 +239,7 @@ def __init__(
)
self.params = params
self.use_value_function = None
self.epsilon = epsilon

def value_function(self, game, p0_color):
raise NotImplementedError
Expand All @@ -238,6 +254,9 @@ def decide(self, game: Game, playable_actions):
if len(actions) == 1:
return actions[0]

if self.epsilon is not None and random.random() < self.epsilon:
return random.choice(playable_actions)

start = time.time()
state_id = str(len(game.state.actions))
node = DebugStateNode(state_id, self.color) # i think it comes from outside
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,7 @@ def objective(trial):
ValueFunctionPlayer(Color.RED, "C", params=DEFAULT_WEIGHTS),
ValueFunctionPlayer(Color.BLUE, "C", params=weights),
]
wins, results_by_player = play_batch(
200, players, None, False, False, loglevel="ERROR"
)
wins, results_by_player = play_batch(200, players)
vps = results_by_player[players[1].color]
avg_vps = sum(vps) / len(vps)
return 1000 * wins[players[1].color] + avg_vps
Expand Down
100 changes: 77 additions & 23 deletions catanatron_experimental/catanatron_experimental/play.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from dataclasses import dataclass

import click
from rich.console import Console
Expand Down Expand Up @@ -87,9 +88,20 @@ def render(self, task):
""",
)
@click.option(
"--quiet/--no-quiet",
"--config-discard-limit",
default=7,
help="Sets Discard Limit to use in games.",
)
@click.option(
"--config-vps-to-win",
default=10,
help="Sets Victory Points needed to win games.",
)
@click.option(
"--quiet",
default=False,
help="Whether to print results to the console",
is_flag=True,
help="Silence console output. Useful for debugging.",
)
@click.option(
"--help-players",
Expand All @@ -98,7 +110,18 @@ def render(self, task):
help="Show player codes and exits.",
is_flag=True,
)
def simulate(num, players, output, json, csv, db, quiet, help_players):
def simulate(
num,
players,
output,
json,
csv,
db,
config_discard_limit,
config_vps_to_win,
quiet,
help_players,
):
"""
Catan Bot Simulator.
Catanatron allows you to simulate millions of games at scale
Expand Down Expand Up @@ -128,7 +151,31 @@ def simulate(num, players, output, json, csv, db, quiet, help_players):
players.append(player)
break

play_batch(num, players, output, json, csv, db, quiet)
output_options = OutputOptions(output, csv, json, db)
game_config = GameConfigOptions(config_discard_limit, config_vps_to_win)
play_batch(
num,
players,
output_options,
game_config,
quiet,
)


@dataclass(frozen=True)
class OutputOptions:
"""Class to keep track of output CLI flags"""

output: str # path to store files
csv: bool = False
json: bool = False
db: bool = False


@dataclass(frozen=True)
class GameConfigOptions:
discard_limit: int = 7
vps_to_win: int = 10


COLOR_TO_RICH_STYLE = {
Expand All @@ -151,38 +198,43 @@ def rich_color(color):
return f"[{style}]{color.value}[/{style}]"


def play_batch_core(num_games, players, accumulators=[]):
def play_batch_core(num_games, players, game_config, accumulators=[]):
for _ in range(num_games):
for player in players:
player.reset_state()
game = Game(players)
game = Game(
players,
discard_limit=game_config.discard_limit,
vps_to_win=game_config.vps_to_win,
)
game.play(accumulators)
yield game


def play_batch(
num_games,
players,
output=None,
json=False,
csv=False,
db=False,
output_options=None,
game_config=None,
quiet=False,
):
output_options = output_options or OutputOptions()
game_config = game_config or GameConfigOptions()

statistics_accumulator = StatisticsAccumulator()
vp_accumulator = VpDistributionAccumulator()
accumulators = [statistics_accumulator, vp_accumulator]
if output:
ensure_dir(output)
if output and csv:
accumulators.append(CsvDataAccumulator(output))
if output and json:
accumulators.append(JsonDataAccumulator(output))
if db:
if output_options.output:
ensure_dir(output_options.output)
if output_options.output and output_options.csv:
accumulators.append(CsvDataAccumulator(output_options.output))
if output_options.output and output_options.json:
accumulators.append(JsonDataAccumulator(output_options.output))
if output_options.db:
accumulators.append(DatabaseAccumulator())

if quiet:
for _ in play_batch_core(num_games, players, accumulators):
for _ in play_batch_core(num_games, players, game_config, accumulators):
pass
return

Expand All @@ -196,7 +248,7 @@ def play_batch(
for player in players:
table.add_column(f"{player.color.value} VP", justify="right")
table.add_column("WINNER")
if db:
if output_options.db:
table.add_column("LINK", overflow="fold")

with Progress(
Expand All @@ -214,7 +266,9 @@ def play_batch(
for player in players
]

for i, game in enumerate(play_batch_core(num_games, players, accumulators)):
for i, game in enumerate(
play_batch_core(num_games, players, game_config, accumulators)
):
winning_color = game.winning_color()

if (num_games - last_n) < (i + 1):
Expand All @@ -228,7 +282,7 @@ def play_batch(
points = get_actual_victory_points(game.state, player.color)
row.append(str(points))
row.append(rich_color(winning_color))
if db:
if output_options.db:
row.append(accumulators[-1].link)

table.add_row(*row)
Expand Down Expand Up @@ -284,8 +338,8 @@ def play_batch(
table.add_row(avg_ticks, avg_turns, avg_duration)
console.print(table)

if output and csv:
console.print(f"GZIP CSVs saved at: [green]{output}[/green]")
if output_options.output and output_options.csv:
console.print(f"GZIP CSVs saved at: [green]{output_options.output}[/green]")

return (
dict(statistics_accumulator.wins),
Expand Down
4 changes: 1 addition & 3 deletions catanatron_experimental/catanatron_experimental/rayopt.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@ def objective(config):
ValueFunctionPlayer(Color.RED, "C", params=DEFAULT_WEIGHTS),
ValueFunctionPlayer(Color.BLUE, "C", params=config),
]
wins, results_by_player = play_batch(
100, players, None, False, False, loglevel="ERROR"
)
wins, results_by_player = play_batch(100, players)
vps = results_by_player[players[1].color]
avg_vps = sum(vps) / len(vps)
score = 100 * wins[players[1].color] + avg_vps
Expand Down
Loading

0 comments on commit a3f0d33

Please sign in to comment.