In [114]:
import time, math, random
from copy import deepcopy
from IPython.display import clear_output
import numpy as np

# Barebone UCT
https://youtu.be/ItMutbeOHtc?t=3922, https://www.youtube.com/watch?v=Bm7zah_LrmE

1. From each node (a node = a state of the board), repeat trying unexplored moves and rollout randomly unil gameover. The win/lose count is propagated back along the path taken for each node
2. when all moves of the node have been tried at least once, *select* the once to try based on UCB (mean value + C\*uncertainty of the mean). Repeat doing this until there is at least an untried move at the node
3. repeat 1&2 until desired time/iteration limit, then select the best move at root node (current real situation of the board) based on *mean value* of each move

4. consider some nodes may be reached from different ways; so keep the {move: child_node} dictionary. when selecting child node, return the corresponding move and child_node also

## Code: [mcts.ai](http://mcts.ai/code/python.html) + [mcts](https://github.com/pbsinclair42/MCTS/) + reuse nodes

In [295]:
class Node:
    """ Wins is always from the viewpoint of playerJustMoved."""
    def __init__(self, state = None):
        self.childNodes = {} # the move that get *into* next node
        self.wins = 0
        self.visits = 0
        self.untriedMoves = state.GetMoves() # future child nodes
        self.playerJustMoved = state.playerJustMoved # the only part of the state that the Node needs later
        
    def UCTSelectChild(self,explore=1):
        """ Use the UCB1 formula to select a child node. Often a constant UCTK is applied so we have
            lambda c: c.wins/c.visits + UCTK * sqrt(2*log(self.visits)/c.visits to vary the amount of
            exploration versus exploitation.        """
        return max(list(self.childNodes.items()), 
                   key = lambda c: c[1].wins/c[1].visits + explore*math.sqrt(2*math.log(self.visits)/c[1].visits) )
    
    def AddChild(self, move, s, n = None):
        """ Remove m from untriedMoves and add a new child node for this move. if provided, use that  """
        if n is None:
            n = Node(state = s)
        self.untriedMoves.remove(move)
        self.childNodes[move] = n
        return n
    
    def Update(self, result):
        """ update visit & win counts. (from the viewpoint of playerJustmoved) """
        self.visits += 1
        self.wins += result # game results in the range [0.0, 1.0]

    def __repr__(self):
        return f"[W/V: {self.wins:6.4g}/{self.visits:5d} | U: {self.untriedMoves}]"

    def TreeToString(self, indent):
        s = "\n"+ "| "*indent + str(self)
        s += ''.join(c.TreeToString(indent+1) for c in self.childNodes.values())
        return s

    def ChildrenToString(self):        
        return "\n".join(f'{m}|{c}' for m,c in sorted(self.childNodes.items(),key=lambda e: e[1].wins/e[1].visits))
    
class MCTS():   #reuses nodes
    def __init__(self, explore=1, verbosity=0):
        self.nodes = {}               #store all nodes previously explored
        self.explore = explore
        self.verbosity = verbosity

    def search(self, state, timemax=None, itermax=None):
        
        strstate=str((state,state.playerJustMoved))
        if strstate not in self.nodes:
            self.nodes[strstate] = Node(state = state)
        rootnode = self.nodes[strstate]
        
        if timemax is None:
            for _ in range(itermax):
                self.simulate(state.Clone(),rootnode)
        elif itermax is None:
            start = time.time()
            while time.time()<start+timemax:
                for _ in range(100):
                    self.simulate(state.Clone(),rootnode)
        else:
            start = time.time()
            i = 0
            while time.time()<start+timemax and i<itermax:
                for _ in range(100):
                    i = i+1
                    self.simulate(state.Clone(),rootnode)

        moveToChild, bestChild = rootnode.UCTSelectChild(explore=0)
        if self.verbosity==2:   print(rootnode.TreeToString(0))
        elif self.verbosity==1: print(rootnode.ChildrenToString())
            
        return moveToChild

    def simulate(self,state,node):
        
        path = [node]
        #descend (following UCB) to the first branch not fully expanded (some moves not tried), or reaching end of game
        while node.untriedMoves == [] and node.childNodes != {}:
            move,node = node.UCTSelectChild(self.explore)
            state.DoMove(move)
            path += node,
        
        # if we can expand (i.e. node is not terminal), expand (add a childNode) and move the state into it 
        if node.untriedMoves != []: 
            move = random.choice(node.untriedMoves) 
            state.DoMove(move)
            k = str((state,state.playerJustMoved))
            if k in self.nodes:
                nextnode = self.nodes[k]
            else:
                nextnode = Node(state = state)
                
            node.AddChild(move,state,nextnode)
            self.nodes[k] = nextnode
            node = nextnode
            path += node,

        # Rollout to END of a game randomly (not expanding childNodes -- just want to estimate the newly added node's value)
        state.RollOut()

        # state is now terminal; backpropagate this game's result to its path's nodes' win counts
        viewpoint = node.playerJustMoved
        reward = state.GetResult(viewpoint)
        for node in path:
            if node.playerJustMoved == viewpoint:
                node.Update(reward)
            else:
                node.Update(1-reward)

## tic-tac-toe

In [287]:
#contains current state and who has last moved
class OXOState: # on a size*size board, first one to fill "inarow" symbols horizonally/vertically/diagonally wins
    def __init__(self,size=3,inarow=None,board=None,moves=None,lines=None):
        self.playerJustMoved = 2 #  (1) will have the first move
        self.size = size
        self.board = board or [0]*size*size # 0 = empty, 1 = player 1, 2 = player 2
        self.moves = moves or list(range(size*size))
        
        # winning lines
        self.lines = lines
        if self.lines is None:
            inarow = inarow or size
            self.lines = [tuple(range(i*size+j,i*size+j+inarow)) for i in range(0,size) for j in range(0,size-inarow+1)]+\
                         [tuple(range(i*size+j,(i+inarow)*size+j,size)) for i in range(0,size-inarow+1) for j in range(0,size)]+\
                         [tuple(range(i*size+j,i*size+j+inarow*(size+1),size+1)) for i in range(size-inarow+1) for j in range(size-inarow+1)]+\
                         [tuple(range(i*size+j,i*size+j+inarow*(size-1),size-1)) for i in range(size-inarow+1) for j in range(inarow-1,size)]
#             if size==4: #https://play.google.com/store/apps/details?id=techmasterplus.tictactoe4x4&hl=en_US&rdid=techmasterplus.tictactoe4x4
#                 self.lines+=[(0,1,4,5),(1,2,5,6),(2,3,6,7),(4,5,8,9),(5,6,9,10),(6,7,10,11),(8,9,12,13),(9,10,13,14),(10,11,14,15)]
        
    def Clone(self):
        """ Create a deep clone of this game state. """
        st = OXOState(self.size,None,self.board[:],self.moves[:],self.lines)
        st.playerJustMoved = self.playerJustMoved
        return st

    def DoMove(self, move):
        """ Update a state by carrying out the given move. Must also update playerJustMoved.  """
        assert move in self.moves#self.board[move] == 0
        self.playerJustMoved = 3 - self.playerJustMoved
        self.board[move] = self.playerJustMoved
        self.moves.remove(move)
        if self.GetResult(self.playerJustMoved) in [0,1]:  #indicate game ended as someone won
            self.moves=[] 
        
    def GetMoves(self):  # legal moves for next player. empty if game is over
        return self.moves[:]
    
    def RollOut(self):
        while self.moves:
            self.DoMove(random.choice(self.moves))
#         possibleMoves = self.GetMoves()  #naive version
#         while possibleMoves:
#             self.DoMove(random.choice(possibleMoves))
#             possibleMoves = self.GetMoves()
    
    def GetResult(self, viewpoint):  # reward from `viewpoint`, in the range [0.0, 1.0]
        for l in self.lines:
            if self.board[l[0]]!=0 and len(set(self.board[p] for p in l))==1:
                if self.board[l[0]] == viewpoint:
                    return 1
                else:
                    return 0
        return 0.5

    def __repr__(self): # string representation: 1 (X), 2 = (O)
        s = ''
        for left in range(0,self.size*self.size,self.size):
            for sh in range(self.size):
                s+="·XO"[self.board[left+sh]]
            s+='\n'
        return s

In [None]:
state = OXOState(5,4) #https://www.coolmathgames.com/0-tic-tac-toe
state = OXOState(7,4) #https://www.math10.com/en/math-games/tic-tac-toe/tic-tac-toe.html, https://m.twoplayergames.org/play/tic-tac-toe-3-5-7.html
state = OXOState(3)
node = None
mcts = MCTS(verbosity=1)
while state.GetMoves():
    if state.playerJustMoved==10:
#         try:print(mcts.nodes[str(state)].ChildrenToString())
#         except:pass
        clear_output()
        board=np.array([f'{("·XO"[s] if s else str(pos)):2s}' for pos,s in enumerate(state.board)]).reshape(7,7)
        m = int(input(board))
    else:
        m = mcts.search(state,timemax=5) #         m = UCT(rootstate = state, itermax = 10000, verbosity=1)
    state.DoMove(m)
    print(state)
if state.GetResult(1) == 1:   print("Player 1 wins!")
elif state.GetResult(2) == 1: print("Player 2 wins!")
else: print("Nobody wins!")

## Othello

In [297]:
class OthelloState:
    def __init__(self, size = 8, board=None):  # size must be integral and even
        self.playerJustMoved = 2
        self.size = size
        self.board = board
        if self.board is None:
            self.board = [] # 0 = empty, 1 = player 1, 2 = player 2
            for y in range(self.size):
                self.board.append([0]*size)
            self.board[size//2][size//2] = self.board[size//2-1][size//2-1] = 2
            self.board[size//2][size//2-1] = self.board[size//2-1][size//2] = 1
            self.board = [e for l in self.board for e in l]

    def Clone(self):
        st = OthelloState(size=self.size, board=self.board[:])
        st.playerJustMoved = self.playerJustMoved
        return st

    def DoMove(self, move):
        if move is not None:
            (x,y)=divmod(move,self.size)
            assert self.IsOnBoard(x,y) and self.board[x*self.size+y] == 0
            m = self.GetAllSandwichedCounters(x,y)
            self.playerJustMoved = 3 - self.playerJustMoved
            self.board[x*self.size+y] = self.playerJustMoved
            for (a,b) in m:
                self.board[a*self.size+b] = self.playerJustMoved
        else: 
            self.playerJustMoved = 3 - self.playerJustMoved
    
    def GetMoves(self):
        emptypos = [pos for pos,e in enumerate(self.board) if e==0]
        if not emptypos:
            return []
        else:
            viable = [pos for pos in emptypos if self.ExistsSandwiched(*divmod(pos,self.size))] 
            if viable:
                return viable
            else:  #need to check if opponent also has no viable pos
                self.playerJustMoved = 3 - self.playerJustMoved
                oppoViable = [pos for pos in emptypos if self.ExistsSandwiched(*divmod(pos,self.size))]
                self.playerJustMoved = 3 - self.playerJustMoved
                if oppoViable:
                    return [None]
                else:
                    return []
    
    def RollOut(self):
        emptypos = [pos for pos,e in enumerate(self.board) if e==0]
        bad=0 
        while emptypos:
            random.shuffle(emptypos)
            for pos in emptypos:
                if self.ExistsSandwiched(*divmod(pos,self.size)):
                    self.DoMove(pos)
                    emptypos = [i for i,e in enumerate(self.board) if e==0]
                    break
            else:
                if bad<2: #when bad=2, both side cannot play, game ends
                    bad+=1
                    self.DoMove(None)
                else:
                    break

    def AdjacentEnemyDirections(self,x,y):# Speeds up GetMoves by only considering squares which are adjacent to an enemy-occupied square.
        return [(dx,dy) for (dx,dy) in [(0,+1),(+1,+1),(+1,0),(+1,-1),(0,-1),(-1,-1),(-1,0),(-1,+1)]
                        if self.IsOnBoard(x+dx,y+dy) and self.board[(x+dx)*self.size+y+dy] == self.playerJustMoved]
    
    def ExistsSandwiched(self,x,y):# Is there at least one counter which would be flipped if my counter was placed at (x,y)? 
        for (dx,dy) in self.AdjacentEnemyDirections(x,y):
            x1=x+dx
            y1=y+dy
            while self.IsOnBoard(x1,y1) and self.board[x1*self.size+y1] == self.playerJustMoved:
                x1 += dx
                y1 += dy
            if self.IsOnBoard(x1,y1) and self.board[x1*self.size+y1] == 3 - self.playerJustMoved:
                return True
        return False
    
    def GetAllSandwichedCounters(self, x, y):# Is (x,y) a possible move (i.e. opponent counters are sandwiched between (x,y) and my counter in some direction)?
        sandwiched = []
        for (dx,dy) in self.AdjacentEnemyDirections(x,y):
            sandwiched.extend(self.SandwichedCounters(x,y,dx,dy))
        return sandwiched

    def SandwichedCounters(self, x, y, dx, dy):# Return the coordinates of all opponent counters sandwiched between (x,y) and my counter.
        x += dx
        y += dy
        sandwiched = []
        while self.IsOnBoard(x,y) and self.board[x*self.size+y] == self.playerJustMoved:
            sandwiched.append((x,y))
            x += dx
            y += dy
        if self.IsOnBoard(x,y) and self.board[x*self.size+y] == 3 - self.playerJustMoved:
            return sandwiched
        else:
            return [] # nothing sandwiched

    def IsOnBoard(self, x, y):
        return x >= 0 and x < self.size and y >= 0 and y < self.size
    
    def GetResult(self, viewpoint): #after gameover
        viewpointscore=oppositescore=0
        for e in self.board:
            if e==viewpoint: viewpointscore+=1
            elif e==3-viewpoint: oppositescore+=1
        if viewpointscore > oppositescore: return 1.0
        elif oppositescore > viewpointscore: return 0.0
        else: return 0.5 # draw

    def __repr__(self):
        s= ""
        for x in range(self.size):
            for y in range(self.size):
                s += "·XO"[self.board[x*self.size+y]]
            s += "\n"
        return s

In [None]:
state = OthelloState(8)
mcts = MCTS(verbosity=1)
while state.GetMoves():
    if state.playerJustMoved==1:
#         clear_output()
        board=np.array([f'{("·XO"[e] if e else str(p)):2s}' for p,e in enumerate(state.board)]).reshape(8,8)
        print(board)
        if str((state,state.playerJustMoved)) in mcts.nodes:
            n=mcts.nodes[str((state,state.playerJustMoved))]
            print(n.ChildrenToString())
            print('choose from:',end='')
        m = int(input(state.GetMoves()))
    else:
        m = mcts.search(state,timemax=30)
    state.DoMove(m)
    print(state)
if state.GetResult(1) == 1:   print("Player 1 wins!")
elif state.GetResult(2) == 1: print("Player 2 wins!")
else: print("Nobody wins!")

37|[W/V:  949.5/ 2017 | U: []]
19|[W/V:   1356/ 2801 | U: []]
44|[W/V:   1410/ 2904 | U: []]
26|[W/V:   1554/ 3178 | U: []]
········
········
········
··XXX···
···XO···
········
········
········

[['0 ' '1 ' '2 ' '3 ' '4 ' '5 ' '6 ' '7 ']
 ['8 ' '9 ' '10' '11' '12' '13' '14' '15']
 ['16' '17' '18' '19' '20' '21' '22' '23']
 ['24' '25' 'X ' 'X ' 'X ' '29' '30' '31']
 ['32' '33' '34' 'X ' 'O ' '37' '38' '39']
 ['40' '41' '42' '43' '44' '45' '46' '47']
 ['48' '49' '50' '51' '52' '53' '54' '55']
 ['56' '57' '58' '59' '60' '61' '62' '63']]
18|[W/V:    460/  912 | U: []]
34|[W/V:  561.5/ 1098 | U: []]
20|[W/V:  601.5/ 1167 | U: []]
choose from:[18, 20, 34]20
········
········
····O···
··XXO···
···XO···
········
········
········

37|[W/V:    743/ 1622 | U: []]
21|[W/V:   1168/ 2444 | U: []]
29|[W/V:   1233/ 2567 | U: []]
13|[W/V:   1328/ 2747 | U: []]
45|[W/V:   1560/ 3186 | U: []]
········
········
····O···
··XXO···
···XX···
·····X··
········
········

[['0 ' '1 ' '2 ' '3 ' '4 ' '5 ' '6 ' 

## [RocAlphaGo](https://github.com/Rochester-NRT/RocAlphaGo/tree/develop/AlphaGo)