In [1]:
import time, math, random
from copy import deepcopy

# Implementations

## [mcts](https://github.com/pbsinclair42/MCTS/) (modified)

incomplete, not recommended

In [53]:
def randomPolicy(state):
    while not state.isTerminal():
        try:
            action = random.choice(state.getPossibleActions())
        except IndexError:
            raise Exception("Non-terminal state has no possible actions: " + str(state))
        state = state.takeAction(action)
    return state.getReward()


class Node():
    def __init__(self, state, parent):
        self.state = state
        self.isTerminal = state.isTerminal()
        self.isFullyExpanded = self.isTerminal
        self.parent = parent
        self.numVisits = 0
        self.totalReward = 0
        self.children = {}


class MCTS():
    def __init__(self, timeLimit=None, iterationLimit=None, explorationConstant=1 / math.sqrt(2),
                 rolloutPolicy=randomPolicy):
        self.timeLimit = timeLimit
        self.searchLimit = iterationLimit
        self.explorationConstant = explorationConstant
        self.rollout = rolloutPolicy

    def search(self, initialState):
        self.root = Node(initialState, None)
        
        startTime = time.time()
        i = 0
        while (self.timeLimit is None or time.time()<startTime+self.timeLimit) and\
              (self.searchLimit is None or i<self.searchLimit):
                self.executeRound()
            
        bestChild = self.getBestChild(self.root, 0)
        return self.getAction(self.root, bestChild)

    def executeRound(self):
        node = self.selectNode(self.root)
        reward = self.rollout(node.state)
        self.backpropogate(node, reward)

    def selectNode(self, node):
        while not node.isTerminal:
            if node.isFullyExpanded:
                node = self.getBestChild(node, self.explorationConstant)
            else:
                return self.expand(node)
        return node

    def expand(self, node):
        actions = node.state.getPossibleActions()
        for action in actions:
            if action not in node.children:
                newNode = Node(node.state.takeAction(action), node)
                node.children[action] = newNode
                if len(actions) == len(node.children):
                    node.isFullyExpanded = True
                return newNode

    def backpropogate(self, node, reward):
        while node is not None:
            node.numVisits += 1
            node.totalReward += reward
            node = node.parent

    def getBestChild(self, node, explorationValue):
        bestValue = float("-inf")
        bestNodes = []
        for child in node.children.values():
            nodeValue = child.totalReward / child.numVisits + explorationValue * math.sqrt(
                2 * math.log(node.numVisits) / child.numVisits)
            if nodeValue > bestValue:
                bestValue = nodeValue
                bestNodes = [child]
            elif nodeValue == bestValue:
                bestNodes.append(child)
        return random.choice(bestNodes)

    def getAction(self, root, bestChild):
        for action, node in root.children.items():
            if node is bestChild:
                return action

In [None]:
import operator

class NaughtsAndCrossesState(object):
    def __init__(self):
        self.board = [[0, 0, 0], [0, 0, 0], [0, 0, 0]]
        self.currentPlayer = 1
        return None

    def getPossibleActions(self):
        possibleActions = []
        for i in range(len(self.board)):
            for j in range(len(self.board[i])):
                if self.board[i][j] == 0:
                    possibleActions.append(Action(player=self.currentPlayer, x=i, y=j))
        return possibleActions

    def takeAction(self, action):
        newState = deepcopy(self)
        newState.board[action.x][action.y] = action.player
        newState.currentPlayer = self.currentPlayer * -1
        return newState

    def isTerminal(self):
        for row in self.board:
            if abs(sum(row)) == 3:
                return True
        for column in list(map(list, zip(*self.board))):
            if abs(sum(column)) == 3:
                return True
        for diagonal in [[self.board[i][i] for i in range(len(self.board))],
                         [self.board[i][len(self.board) - i - 1] for i in range(len(self.board))]]:
            if abs(sum(diagonal)) == 3:
                return True
        return all(e for row in self.board for e in row)

    def getReward(self):
        for row in self.board:
            if abs(sum(row)) == 3:
                return sum(row) / 3
        for column in list(map(list, zip(*self.board))):
            if abs(sum(column)) == 3:
                return sum(column) / 3
        for diagonal in [[self.board[i][i] for i in range(len(self.board))],
                         [self.board[i][len(self.board) - i - 1] for i in range(len(self.board))]]:
            if abs(sum(diagonal)) == 3:
                return sum(diagonal) / 3
        return False


class Action():
    def __init__(self, player, x, y):
        self.player = player
        self.x = x
        self.y = y

    def __str__(self):
        return str((self.x, self.y))

    def __repr__(self):
        return str(self)

    def __eq__(self, other):
        return self.__class__ == other.__class__ and self.x == other.x and self.y == other.y and self.player == other.player

    def __hash__(self):
        return hash((self.x, self.y, self.player))

In [55]:
state = NaughtsAndCrossesState()
mcts = MCTS(timeLimit=60)
while not state.isTerminal():
    action = mcts.search(initialState=state)
    print(action)
    state = state.takeAction(action)

(1, 1)
(1, 0)
(0, 2)
(1, 2)
(0, 0)
(2, 0)
(2, 2)


## [mcts.ai](http://mcts.ai/code/python.html)

In [199]:
class Node:
    """ Wins is always from the viewpoint of playerJustMoved.
        Crashes if state not specified.    """
    def __init__(self, move = None, parent = None, state = None):
        self.move = move # the move that got us to this node - "None" for the root node
        self.parentNode = parent # "None" for the root node
        self.childNodes = []
        self.wins = 0
        self.visits = 0
        self.untriedMoves = state.GetMoves() # future child nodes
        self.playerJustMoved = state.playerJustMoved # the only part of the state that the Node needs later
        
    def UCTSelectChild(self):
        """ Use the UCB1 formula to select a child node. Often a constant UCTK is applied so we have
            lambda c: c.wins/c.visits + UCTK * sqrt(2*log(self.visits)/c.visits to vary the amount of
            exploration versus exploitation.        """
        return max(self.childNodes, 
                   key = lambda c: c.wins/c.visits + math.sqrt(2*math.log(self.visits)/c.visits) )
    
    def AddChild(self, m, s):
        """ Remove m from untriedMoves and add a new child node for this move.
            Return the added child node        """
        n = Node(move = m, parent = self, state = s)
        self.untriedMoves.remove(m)
        self.childNodes.append(n)
        return n
    
    def Update(self, result):
        """ update visit & win counts. (from the viewpoint of playerJustmoved) """
        self.visits += 1
        self.wins += result

    def __repr__(self):
        return f"[M:{self.move} | W/V: {self.wins:6.4g}/{self.visits:5d} | U:{self.untriedMoves}]"

    def TreeToString(self, indent):
        s = "\n"+ "| "*indent + str(self)
        s += ''.join(c.TreeToString(indent+1) for c in self.childNodes)
        return s

    def ChildrenToString(self):        
        return "\n".join(str(c) for c in self.childNodes)
    
def UCT(rootstate, itermax, rootnode = None, verbosity = 0):
    """ Conduct a UCT search for itermax iterations starting from rootstate.
        Return the best move from the rootstate.
        Assumes 2 alternating players (player 1 starts), with game results in the range [0.0, 1.0]."""

    if rootnode is None:
        rootnode = Node(state = rootstate)

    for i in range(itermax):
        node = rootnode
        state = rootstate.Clone()  #clone the current game status and rollout from it

        # if all moves are at least chosen once, choose the one to explore/exploit further by UCB
        while node.untriedMoves == [] and node.childNodes != []:
            node = node.UCTSelectChild()
            state.DoMove(node.move)
        #now at terminal state or has untriedMoves

        # if we can expand (i.e. state/node is non-terminal), add a childNode for it and move the state into it 
        if node.untriedMoves != []: 
            m = random.choice(node.untriedMoves) 
            state.DoMove(m)
            node = node.AddChild(m,state)

        # Rollout to END of a game randomly (not building childNodes -- just want to estimate the newly added node's value)
        # sometimes the game class could evalute with faster methods
        while state.GetMoves() != []: #could hash and save the games for future use
            state.DoMove(random.choice(state.GetMoves()))

        # Backpropagate 
        while node: # backpropagate this game's result to its path's nodes' win counts
            node.Update(state.GetResult(node.playerJustMoved)) # state is terminal. Update node with result from POV of node.playerJustMoved
            node = node.parentNode

    if verbosity==2: print(rootnode.TreeToString(0))
    elif verbosity==1: print(rootnode.ChildrenToString())

#     return max(rootnode.childNodes, key = lambda c: c.visits)  # return the move that was most visited
    return max(rootnode.childNodes, key = lambda c: c.wins/c.visits) # return the move that was most likely to win

In [205]:
class OXOState: #contains current state and who has last moved
    def __init__(self):
        self.playerJustMoved = 2 #  (1) will have the first move
        self.board = [0,0,0,0,0,0,0,0,0] # 0 = empty, 1 = player 1, 2 = player 2
        self.moves = list(range(9))
        
    def Clone(self):
        """ Create a deep clone of this game state. """
        st = OXOState()
        st.playerJustMoved = self.playerJustMoved
        st.board = self.board[:]
        st.moves = self.moves[:]
        return st

    def DoMove(self, move):
        """ Update a state by carrying out the given move. Must also update playerJustMoved.  """
        assert self.board[move] == 0
        self.playerJustMoved = 3 - self.playerJustMoved
        self.board[move] = self.playerJustMoved
        self.moves.remove(move)
        if self.GetResult(self.playerJustMoved):  #indicate game ended as someone won
            self.moves=[] 
        
    def GetMoves(self): 
        """ all possible moves """
        return self.moves[:]
    
    def GetResult(self, viewpoint):
        """ Get the game result from the viewpoint of playerjm. """
        for (x,y,z) in [(0,1,2),(3,4,5),(6,7,8),(0,3,6),(1,4,7),(2,5,8),(0,4,8),(2,4,6)]:
            if 0 != self.board[x] == self.board[y] == self.board[z]:
                if self.board[x] == viewpoint:
                    return 1
                else:
                    return -1
        if self.GetMoves() == []: return 0 # draw

    def __repr__(self): # 1 (X), 2 = (O)
        s = ''.join("·XO"[self.board[i]] for i in range(9))
        return f'{s[:3]}\n{s[3:6]}\n{s[6:]}\n'

In [207]:
state = OXOState()
node = None
print(state)
while state.GetMoves():
    node = UCT(rootstate = state, rootnode = node, itermax = 1000, verbosity = False)
    state.DoMove(node.move)
    print(state)
if state.GetResult(1) == 1:   print("Player 1 wins!")
elif state.GetResult(2) == 1: print("Player 2 wins!")
else: print("Nobody wins!")

···
···
···

···
·X·
···

O··
·X·
···

O··
XX·
···

O··
XXO
···

OX·
XXO
···

OX·
XXO
·O·

OX·
XXO
XO·

OXO
XXO
XO·

OXO
XXO
XOX

Nobody wins!


## [RocAlphaGo](https://github.com/Rochester-NRT/RocAlphaGo/tree/develop/AlphaGo)