In [2]:
import numpy as np
import datetime

The board will be represented by a 3x3 matrix, with components -1, 0 or 1. -1(1) corresponds to player -1(1), and 0 to an empty square. The board shall be saved in the self.board at all times.

A state is the same as the board, with a main difference: -1(1) corresponds to the mark of the current(opponent) player.

A play is a 3x3 matrix with all zeros except one component.

In [18]:
class Board(object):
    def start(self):
        self.board = np.zeros(3, 3)
        return self.board.flatten()

    def current_player(self, state):
        """
        Gets the current player number ::cplayNum::
        """
        whos_turn = np.sum(np.abs(state))
        if whos_turn%2 == 0:
            cplayNum = -1
        else:
            cplayNum = 1
        return cplayNum

    def next_state(self, state, play):
        """
        Takes the game state, and the move to be applied, returns the new game state.
        """
        new_state = state + play
        return new_state

    def legal_plays(self, state):
        """
        Takes the game state and returns the possible legal plays
        """
        idx = np.where(state == 0)[0]
        empty_play = np.zeros(9)
        new_plays = []
        for i in idx:
            copy = empty_play[:]
            copy[i] = -1
            new_plays.append(copy)
        return new_plays

    def winner(self, state):
        matrix = state.reshape(3,3)
        if np.any(matrix.sum(axis=0) == 3) or np.any(np.trace(matrix) == 3) or np.any(np.trace(np.fliplr(matrix)) == 3):
            # player 1 wins
            winner = 1
        elif np.any(matrix.sum(axis=0) == -3) or np.any(np.trace(matrix) == -3) or np.any(np.trace(np.fliplr(matrix)) == -3):
            # player -1 wins
            winner = -1
        elif np.where(state == 0)[0]:
            # game still ongoing
            winner = 0
        else:
            # game is a draw
            winner = 2
        return winner

In [None]:
class MonteCarlo(object):
    def __init__(self, board, time=2, max_moves=5):
        self.board = board
        self.states = []
        seconds = time
        self.calculation_time = datetime.timedelta(seconds=seconds)
        self.max_moves = max_moves

    def update(self, state):
        self.states.append(state)

    def get_play(self):
        begin = datetime.datetime.utcnow()
        while datetime.datetime.utcnow() - begin < self.calculation_time:
            self.run_simulation()
        # Causes the AI to calculate the best move from the
        # current game state and return it.

    def run_simulation(self):
        # Plays out a "random" game from the current position,
        # then updates the statistics tables with the result.
        states_copy = self.states[:]
        state = states_copy[-1]

        for t in xrange(self.max_moves):
            legal = self.board.legal_plays(state)
            
            # play to be replaced for the recommended
            play = choice(legal)
            state = self.board.next_state(state, play)
            states_copy.append(state)

            winner = self.board.winner(states_copy)
            if winner:
                break