Permalink
Browse files

Switch to GTP interface instead of homebaked controller

  • Loading branch information...
1 parent 9b80272 commit 21125884ebb93e387d9822e10f18a326c61a53dd @brilee committed Jun 21, 2016
View
@@ -8,3 +8,12 @@ The logic / control flow of AlphaGo itself is not very complicated and is replic
(As I understand it) AlphaGo uses three neural networks during play. The first NN is a slow but accurate policy network. This network is trained to predict human moves (~57% accuracy), and it outputs a list of plausible moves, with probabilities attached to each move. This first NN is used to seed the Monte Carlo tree search with plausible moves. One of the reasons this first NN is slow is because of its size, and because the inputs to the neural network are various computed properties of the Go board (liberty counts; ataris; ladders; etc.). The second NN is a smaller, faster but less accurate (~24% accuracy) policy network, and doesn't use computed properties as input. Once a leaf node of the current MCTS tree is reached, the second faster network is used to play the position out to the end with vaguely plausible moves, and score the end position. The third NN is a value network: it outputs an expected win margin for that board, without attempting to play anything out. The results of the monte carlo playout using NN #2 and the value calculation using NN #3 are averaged, and this value is recorded as the approximate result for that MCTS node.
Using the priors from NN #1 and the accumulating results of MCTS, a new path is chosen for further Monte Carlo exploration.
+
+Playing with/against MuGo
+=========================
+MuGo uses the GTP protocol, and you can use any gtp-compliant program with it.
+
+For example, to play against MuGo using gogui, you can do:
+```
+gogui-twogtp -black 'python main.py random' -white 'gogui-display' -size 9 -komi 7.5 -verbose -auto
+```
View
@@ -1,70 +0,0 @@
-from collections import Counter
-from strategies import AVAILABLE_STRATEGIES
-import tictactoe
-import go
-
-AVAILABLE_GAMES = {
- "tictactoe": tictactoe.EMPTY_BOARD,
- "go": go.Position.initial_state(),
-}
-
-def choose_game():
- choice = None
- while choice is None:
- user_input = input("Choose from available games: " + ', '.join(AVAILABLE_GAMES) + '\n')
- if user_input in AVAILABLE_GAMES:
- choice = AVAILABLE_GAMES[user_input]
- else:
- print("Not a valid choice!")
- return choice
-
-def choose_strategy():
- choice = None
- while choice is None:
- user_input = input("Choose from available strategies: " + ', '.join(AVAILABLE_STRATEGIES) + '\n')
- if user_input in AVAILABLE_STRATEGIES:
- choice = AVAILABLE_STRATEGIES[user_input]
- else:
- print("Not a valid choice!")
- return choice
-
-def play_game(strategy1, strategy2, board, verbose=False):
- while not (board.player1wins or board.player2wins):
- if verbose: print(board)
- if not board.possible_moves():
- break
- if board.player1turn:
- move = strategy1.suggest_move(board)
- else:
- move = strategy2.suggest_move(board)
- if verbose: print("Player {} played {}".format('1' if board.player1turn else '2', move))
- if verbose: print()
- board = board.update(move)
- assert not (board.player1wins and board.player2wins), "Uhoh, both players won somehow %s" % board
-
- if board.player1wins:
- return True
- elif board.player2wins:
- return False
- else:
- return None # A draw.
-
-def run_many(strategy1, strategy2, board, num_trials=1000):
- results = [play_game(strategy1, strategy2, board, verbose=False)
- for i in range(num_trials)]
- return Counter(results)
-
-if __name__ == '__main__':
- game = choose_game()
- strategy1 = choose_strategy()
- strategy2 = choose_strategy()
- result = play_game(strategy1, strategy2, game, verbose=True)
- if result is True:
- print("Player 1 wins!")
- elif result is False:
- print("Player 2 wins!")
- else:
- print("Draw!")
-
-
-
View
@@ -204,7 +204,7 @@ def possible_moves(self):
return [c for c in ALL_COORDS if self.board[c] == '.' and not is_likely_eye(self.board, c)]
def update(self, input):
- return self.play_move('B' if self.player1turn else 'W', parse_coords(input))
+ return self.play_move(parse_coords(input))
def __str__(self):
if self.ko is not None:
@@ -242,7 +242,7 @@ def pass_move(self):
player1turn=not self.player1turn,
)
- def play_move(self, color, c):
+ def play_move(self, c):
# Obeys CGOS Rules of Play. In short:
# No suicides
# Chinese/area scoring
Binary file not shown.
Binary file not shown.
View
Binary file not shown.
View
@@ -57,9 +57,9 @@ def get_next_move(node):
def handle_play_stones(pos, node):
props = node.properties
if 'W' in props:
- pos = pos.play_move('W', pc(props['W'][0]))
+ pos = pos.play_move(pc(props['W'][0]))
elif 'B' in props:
- pos = pos.play_move('B', pc(props['B'][0]))
+ pos = pos.play_move(pc(props['B'][0]))
next_player, _ = get_next_move(node)
if next_player == 'W' and pos.player1turn:
pos = pos._replace(player1turn=False)
View
@@ -1,118 +1,61 @@
import random
-def DEFAULT_VALUE_FUNC(board):
- if board.player1wins: return 1
- if board.player2wins: return -1
- return 0
-
-
-class BaseStrategy(object):
- '''
- Takes in a board implementing the following interface
- class Board:
- possible_moves(self) => list(Move)
- update(self, Move) => Board | None (for invalid move)
- @property player1turn(self) => bool
- @property player1wins(self) => bool
- @property player2wins(self) => bool
- '''
- def __init__(self, value_f=DEFAULT_VALUE_FUNC):
- 'Override the default value function to get better evaluations'
- self.value = value_f
-
- def suggest_move(self, board):
- if not board.possible_moves():
- return None
- return self._suggest_move(board)
-
- def _suggest_move(self, board):
- '''
- Given a board position, suggest a move.
- '''
+import gtp
+
+import go
+
+def parse_pygtp_coords(t):
+ 'Interprets coords in the format (1, 1), with (1,1) being the bottom left'
+ if t == (0, 0):
+ return None
+ rows_from_top = go.N - t[1]
+ return go.W + go.W * rows_from_top + t[0] - 1
+
+def unparse_pygtp_coords(c):
+ if c is None:
+ return (0, 0)
+ c = c - go.W
+ row, column = divmod(c, go.W)
+ return column + 1, go.N - row
+
+class GtpInterface(object):
+ def __init__(self):
+ self.size = 9
+ self.position = None
+ self.komi = 6.5
+ self.clear()
+
+ def set_size(self, n):
+ self.size = n
+ go.set_board_size(n)
+ self.clear()
+
+ def set_komi(self, komi):
+ self.komi = komi
+ self.position = self.position._replace(komi=komi)
+
+ def clear(self):
+ self.position = go.Position.initial_state()._replace(komi=self.komi)
+
+ def accomodate_out_of_turn(self, color):
+ player1turn = (color == gtp.BLACK)
+ if player1turn != self.position.player1turn:
+ self.position = self.position._replace(player1turn=not self.position.player1turn)
+
+ def make_move(self, color, vertex):
+ coords = parse_pygtp_coords(vertex)
+ self.accomodate_out_of_turn(color)
+ self.position = self.position.play_move(coords)
+ return self.position is not None
+
+ def get_move(self, color):
+ self.accomodate_out_of_turn(color)
+ move = self.suggest_move(self.position)
+ return unparse_pygtp_coords(move)
+
+ def suggest_move(self, position):
raise NotImplementedError
-class InteractivePlayer(BaseStrategy):
- def _suggest_move(self, board):
- while True:
- player_input = input("It's your turn! Play a move.\n")
- new_board = board.update(player_input)
- if new_board is None:
- print("Invalid move")
- else:
- return player_input
-
-class RandomPlayer(BaseStrategy):
- def _suggest_move(self, board):
- return random.choice(board.possible_moves())
-
-class OneMoveLookahead(BaseStrategy):
- def _suggest_move(self, board):
- moves = board.possible_moves()
- strategy = max if board.player1turn else min
- moves_with_valuation = [
- (self.value(board.update(move)), move)
- for move in moves
- ]
- return strategy(moves_with_valuation)[1]
-
-class MinMaxPlayer(BaseStrategy):
- def _suggest_move(self, board, MAX_DEPTH=4):
- moves = board.possible_moves()
- random.shuffle(moves)
- strategy = max if board.player1turn else min
- moves_with_valuation = [
- (self.minimax(board.update(move), MAX_DEPTH), move)
- for move in moves
- ]
- return strategy(moves_with_valuation)[1]
-
- def minimax(self, board, depth):
- if depth == 0 or board.player1wins or board.player2wins:
- return self.value(board)
-
- possible_moves = board.possible_moves()
- if not possible_moves:
- return self.value(board)
-
- strategy = max if board.player1turn else min
- moves_with_valuation = [
- (self.minimax(board.update(move), depth-1), move)
- for move in board.possible_moves()
- ]
- return strategy(moves_with_valuation)[0]
-
-class NegamaxABPlayer(BaseStrategy):
- def _suggest_move(self, board, MAX_DEPTH=6):
- moves = board.possible_moves()
- random.shuffle(moves)
- moves_with_valuation = [
- (-self.negamax(board.update(move), float('-inf'), float('inf'), MAX_DEPTH), move)
- for move in moves
- ]
- return max(moves_with_valuation)[1]
-
- def negamax(self, board, alpha, beta, depth):
- inverted = 1 if board.player1turn else -1
- if depth == 0 or board.player1wins or board.player2wins:
- return inverted * self.value(board)
-
- moves = board.possible_moves()
- best_value_sofar = float('-inf')
- for move in moves:
- value = -self.negamax(board.update(move), -beta, -alpha, depth-1)
- best_value_sofar = max(value, best_value_sofar)
- alpha = max(alpha, value)
- if alpha > beta:
- break
- return best_value_sofar
-
-class MCTS(BaseStrategy):
- pass
-
-AVAILABLE_STRATEGIES = {
- 'interactive': InteractivePlayer(),
- 'random': RandomPlayer(),
- 'onemove-lookahead': OneMoveLookahead(),
- 'minimax': MinMaxPlayer(),
- 'minimax-optimized': NegamaxABPlayer()
-}
+class RandomPlayer(GtpInterface):
+ def suggest_move(self, position):
+ return random.choice(position.possible_moves())
@@ -1,32 +0,0 @@
-import unittest
-from tictactoe import ALL_POSITIONS, load_board
-
-class TestTicTacToe(unittest.TestCase):
- def test_possible_moves(self):
- t = load_board('___ ___ ___', True)
- self.assertEqual(t.possible_moves(), ALL_POSITIONS)
-
- t = load_board('XXX ___ ___', True)
- self.assertEqual(t.possible_moves(), 'b1 b2 b3 c1 c2 c3'.split())
-
- def test_win_conditions(self):
- t = load_board('_X_ OXO _XO', True)
- self.assertTrue(t.player1wins)
- self.assertFalse(t.player2wins)
-
- t = load_board('XXX OOO ___', True)
- self.assertTrue(t.player1wins)
- self.assertTrue(t.player2wins)
-
- t = load_board('OOO _XX X__', True)
- self.assertFalse(t.player1wins)
- self.assertTrue(t.player2wins)
-
- def test_updates(self):
- t = load_board('___ ___ ___', True)
- expected_board = load_board('X__ ___ ___', False)
- new_board = t.update('a1')
- self.assertEqual(new_board, expected_board)
-
- t = load_board('XXX ___ ___', True)
- self.assertEqual(t.update('a1'), None)
View
@@ -1,51 +0,0 @@
-from collections import namedtuple
-'''
-Tic tac toe board format: a namedtuple with properties
-a1, a2, a3, b1, b2, b3, c1, c2, c3, player1turn.
-The positional properties have values [True, False, None],
-with True meaning player1; False meaning player2.
-
-A move is represented as a string a1~c3.
-'''
-
-ALL_POSITIONS = 'a1 a2 a3 b1 b2 b3 c1 c2 c3'.split()
-WINNING_LINES = [s.split() for s in (
- 'a1 a2 a3',
- 'b1 b2 b3',
- 'c1 c2 c3',
- 'a1 b1 c1',
- 'a2 b2 c2',
- 'a3 b3 c3',
- 'a1 b2 c3',
- 'a3 b2 c1',
-)]
-
-
-class TicTacToe(namedtuple('TicTacToe', ALL_POSITIONS + ['player1turn'])):
- def possible_moves(self):
- return [pos for pos in ALL_POSITIONS if getattr(self, pos) is None]
-
- @property
- def player1wins(self):
- return any(all(getattr(self, pos) is True for pos in line) for line in WINNING_LINES)
-
- @property
- def player2wins(self):
- return any(all(getattr(self, pos) is False for pos in line) for line in WINNING_LINES)
-
- def update(self, move):
- if getattr(self, move) is not None:
- return None
- return self._replace(**{move: self.player1turn, 'player1turn': not self.player1turn})
-
- def __repr__(self):
- char_map = {True: 'X', False: 'O', None: '_'}
- return "{}{}{}\n{}{}{}\n{}{}{} to play: {}".format(*(char_map[k] for k in self))
-
-def load_board(s, player1turn):
- 'X = player1, O = player2'
- char_map = {'X': True, 'O': False, '_': None}
- s = [char_map[char] for char in s if char in char_map]
- return TicTacToe(*(s + [player1turn]))
-
-EMPTY_BOARD = load_board('___ ___ ___', True)

0 comments on commit 2112588

Please sign in to comment.