In [1]:
from __future__ import print_function
import pickle
from game import Board, Game
from mcts_pure import MCTSPlayer as MCTS_Pure
from mcts_alphaZero import MCTSPlayer
from policy_value_net_numpy import PolicyValueNetNumpy

In [2]:
class Human(object):
    """
    human player
    """

    def __init__(self):
        self.player = None

    def set_player_ind(self, p):
        self.player = p

    def get_action(self, board):
        try:
            location = input("Your move: ")
            if isinstance(location, str):  # for python3
                location = [int(n, 10) for n in location.split(",")]
            move = board.location_to_move(location)
        except Exception as e:
            move = -1
        if move == -1 or move not in board.availables:
            print("invalid move")
            move = self.get_action(board)
        return move

    def __str__(self):
        return "Human {}".format(self.player)


In [3]:
    n = 5
    width, height = 8, 8
    model_file =  'best_policy_8_8_5.model'
    try:
        board = Board(width=width, height=height, n_in_row=n)
        game = Game(board)

        # ############### human VS AI ###################
        # load the trained policy_value_net in either Theano/Lasagne, PyTorch or TensorFlow

        # best_policy = PolicyValueNet(width, height, model_file = model_file)
        # mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400)

        # load the provided model (trained in Theano/Lasagne) into a MCTS player written in pure numpy
        try:
            policy_param = pickle.load(open(model_file, 'rb'))
        except:
            policy_param = pickle.load(open(model_file, 'rb'),
                                       encoding='bytes')  # To support python3
        best_policy = PolicyValueNetNumpy(width, height, policy_param)
        mcts_player = MCTSPlayer(best_policy.policy_value_fn,
                                 c_puct=5,
                                 n_playout=400)  # set larger n_playout for better performance

        # uncomment the following line to play with pure MCTS (it's much weaker even with a larger n_playout)
        # mcts_player = MCTS_Pure(c_puct=5, n_playout=1000)

        # human player, input your move in the format: 2,3
        human = Human()

        # set start_player=0 for human first
        game.start_play(human, mcts_player, start_player=1, is_shown=1)
    except KeyboardInterrupt:
        print('\n\rquit')

Player 1 with X
Player 2 with O

       0       1       2       3       4       5       6       7

   7   _       _       _       _       _       _       _       _    


   6   _       _       _       _       _       _       _       _    


   5   _       _       _       _       _       _       _       _    


   4   _       _       _       _       _       _       _       _    


   3   _       _       _       _       _       _       _       _    


   2   _       _       _       _       _       _       _       _    


   1   _       _       _       _       _       _       _       _    


   0   _       _       _       _       _       _       _       _    


Player 1 with X
Player 2 with O

       0       1       2       3       4       5       6       7

   7   _       _       _       _       _       _       _       _    


   6   _       _       _       _       _       _       _       _    


   5   _       _       _       _       _       _       _       _    


   4   _       _     

In [4]:
print(policy_param)

[array([[[[ 3.00282031e-01,  7.93916807e-02,  1.04310252e-01],
         [-2.32803449e-02,  4.39492881e-01, -1.46985695e-01],
         [ 7.46263936e-02,  1.61980420e-01,  5.00340879e-01]],

        [[-1.50764182e-01,  9.13940221e-02, -6.68839067e-02],
         [-1.11774810e-01,  2.15447694e-01,  3.71383615e-02],
         [ 2.60537509e-02,  1.37014072e-02,  5.10720573e-02]],

        [[-6.64655939e-02, -8.79024416e-02,  9.44483057e-02],
         [-1.87298078e-02, -2.23822589e-03,  3.32160555e-02],
         [-3.61888446e-02, -4.40173298e-02, -5.63976282e-05]],

        [[ 5.94474003e-02, -9.88958105e-02, -2.14816071e-02],
         [-2.01959927e-02,  2.27627177e-02,  1.19268738e-01],
         [ 2.24035811e-02, -2.85499338e-02, -8.97876024e-02]]],


       [[[ 1.74412161e-01, -2.87367523e-01, -3.80795151e-02],
         [-1.11669218e-02, -1.62341457e-03,  2.17930842e-02],
         [ 7.55131468e-02,  1.05582893e-01, -1.08373843e-01]],

        [[ 1.31837398e-01,  9.13798437e-02,  6.94752336e-

In [12]:
print(len(policy_param))

16
