Skip to content
This repository has been archived by the owner on Jun 16, 2021. It is now read-only.

Commit

Permalink
Implement a policy-weighted random move player
Browse files Browse the repository at this point in the history
  • Loading branch information
brilee committed Jan 24, 2017
1 parent 4e25b00 commit 25ce4b5
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 5 deletions.
4 changes: 3 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import gtp as gtp_lib

from policy import PolicyNetwork
from strategies import RandomPlayer, PolicyNetworkBestMovePlayer, MCTS
from strategies import RandomPlayer, PolicyNetworkBestMovePlayer, PolicyNetworkRandomMovePlayer, MCTS
from load_data_sets import process_raw_data, DataSet

TRAINING_CHUNK_RE = re.compile(r"train\d+\.chunk.gz")
Expand All @@ -18,6 +18,8 @@ def gtp(strategy, read_file=None):
instance = RandomPlayer()
elif strategy == 'policy':
instance = PolicyNetworkBestMovePlayer(n, read_file)
elif strategy == 'randompolicy':
instance = PolicyNetworkRandomMovePlayer(n, read_file)
elif strategy == 'mcts':
instance = MCTS(n, read_file)
else:
Expand Down
51 changes: 47 additions & 4 deletions strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,30 @@ def translate_gtp_colors(gtp_color):
def is_move_reasonable(position, move):
return position.is_move_legal(move) and go.is_eyeish(position.board, move) != position.to_play

def select_most_likely(position, move_probabilities):
for move in sorted_moves(move_probabilities):
if is_move_reasonable(position, move):
return move
return None

def select_weighted_random(position, move_probabilities):
selection = random.random()
selected_move = None
current_probability = 0
# technically, don't have to sort in order to correctly simulate a random
# draw, but it cuts down on how many additions we do.
for move, move_prob in np.ndenumerate(move_probabilities):
current_probability += move_prob
if current_probability > selection:
selected_move = move
break
if is_move_reasonable(position, selected_move):
return selected_move
else:
# fallback in case the selected move was illegal
return select_most_likely(position, move_probabilities)


class GtpInterface(object):
def __init__(self):
self.size = 9
Expand Down Expand Up @@ -89,10 +113,29 @@ def suggest_move(self, position):
# Pass if the opponent passes
return None
move_probabilities = self.policy_network.run(position)
for move in sorted_moves(move_probabilities):
if is_move_reasonable(position, move):
return move
return None
return select_most_likely(position, move_probabilities)

class PolicyNetworkRandomMovePlayer(GtpInterface):
def __init__(self, policy_network, read_file):
self.policy_network = policy_network
self.read_file = read_file
super().__init__()

def clear(self):
super().clear()
self.refresh_network()

def refresh_network(self):
# Ensure that the player is using the latest version of the network
# so that the network can be continually trained even as it's playing.
self.policy_network.initialize_variables(self.read_file)

def suggest_move(self, position):
if position.recent and position.n > 100 and position.recent[-1].move == None:
# Pass if the opponent passes
return None
move_probabilities = self.policy_network.run(position)
return select_weighted_random(position, move_probabilities)

# Exploration constant
c_PUCT = 5
Expand Down

0 comments on commit 25ce4b5

Please sign in to comment.