In [1]:
# implementation of MCTS
# based on https://youtu.be/UXW2yZndl7U?si=Uer-RZXKU8ozQBSR
# and https://en.wikipedia.org/wiki/Monte_Carlo_tree_search
#
# implementation is slightly different from referenced video
# video shows an algorithm flowchart, but breaks the flowchart for the root node
# mine treats the root node as per the flowchart, which I think is 'nice' because it's
# more 'correct', but it's actually bad because it's unnecessary, but anything else
# would just make it more complex, so whatever. it's extremely minor (I think).

In [2]:
import numpy as np
import random
import math
import sys

In [3]:
# remove index from list in O(1) without preserving order
def fast_remove(list, index):
    if index < len(list):
        list[-1], list[index] = list[index], list[-1]
        list.pop()
    return list

In [4]:
class game():
    # expects initial_state is a np.array (otherwise copy semantics break)
    def __init__(self, initial_state=None, playing=1):
        self.state = initial_state if initial_state is not None else np.zeros((3, 3), dtype=int)
        self.playing = playing
        self.winner = None
        self._open_indices = self._get_open_indices()

    @property
    def opponent(self):
        return self.playing ^ 3

        
    def __repr__(self):
        return f'\n{str(self.state)}\n' + (f'current player: {self.playing}' if self.playing is not None else (f'winner: {self.winner}' if self.winner is not None else 'draw'))

    def play(self, pos):
        x, y = pos
        fast_remove(self._open_indices, self._open_indices.index(pos))
        
        if  self.playing is None:
            return None
        elif self.state[y][x] != 0:
            return False
            
        self.state[y][x] = self.playing

        self._update_winner(x, y) # update winner

        if self.playing is not None:
            self.playing = self.playing ^ 3 # 1 -> 2, 2 -> 1
        
        return self


    # beware: this function only works if the move to x, y actually has same
    #         value as self.player. It won't work if there are 3 in a row,
    #         but they're of the wrong value.
    def _update_winner(self, x, y):
        def set_winner():
            self.winner = self.playing
            self.playing = None
            self._open_indices = [] #useful for MCTS code, that way we don't actually need to check whether or not a given index is a winner, we just check if it has children, which we're already doing.
                                      #although this is somewhat of a hack and may not be the best solution in all circumstances
            
        if self.winner:
            return

        r = self.playing
        for i in range(3):    
            r &= self.state[i][x]
        if r != 0:
            set_winner()
            return
        
        c = self.playing    
        for i in range(3):    
            c &= self.state[y][i]
        if c != 0:
            set_winner()
            return
        
        if (self.playing ==
            self.state[0][0] ==
            self.state[1][1] ==
            self.state[2][2]):
            set_winner()
            return
            
        if (self.playing ==        
            self.state[0][2] ==            
            self.state[1][1] ==
            self.state[2][0]):    
            set_winner()
            return

        if len(self._open_indices) <= 0:
            self.winner = None
            self.playing = None

    def random_finish(self):
        while (self.winner == None) and self._open_indices:
            move = random.randint(0, len(self._open_indices)-1) # int in [0, len(self._open_indices)-1]
            self.play(self._open_indices[move])
            #fast_remove(self._open_indices, move)
    
    def _get_open_indices(self):
        return [
            (x, y)
            for x in range(3)
            for y in range(3)
            if self.state[y][x] == 0
        ]

    def copy(self):
        tmp = game(initial_state=self.state.copy())
        tmp.playing = self.playing
        return tmp

In [5]:
# _update_winner tests

def as_eq(check, expect):
    assert check == expect, f'expected {expect}, got {check}'

g = game(initial_state=np.array([
    [1, 0, 0],
    [0, 1, 0],
    [0, 0, 1]
]))
g._update_winner(0, 0)
as_eq(g.winner, 1)

g = game(initial_state=np.array([
    [1, 0, 0],
    [1, 0, 0],
    [1, 0, 0]
]))
g._update_winner(0, 0)
as_eq(g.winner, 1)

g = game(initial_state=np.array([
    [0, 1, 0],
    [0, 1, 0],
    [0, 1, 0]
]))
g._update_winner(1, 2)
as_eq(g.winner, 1)


g = game(initial_state=np.array([
    [0, 2, 0],
    [0, 2, 0],
    [0, 2, 0]
]))
g.playing = 2
g._update_winner(1, 0)
as_eq(g.winner, 2)

g = game(initial_state=np.array([
    [1, 2, 1],
    [1, 2, 1],
    [1, 2, 1]
]))
g.playing = 2
g._update_winner(1, 2)
as_eq(g.winner, 2)

# fast_remove tests
as_eq(fast_remove([], 0), [])
as_eq(fast_remove([1], 0), [])
as_eq(fast_remove([1, 2], 0), [2])
as_eq(fast_remove([1, 2], 1), [1])
as_eq(fast_remove([1, 2], 2), [1, 2])
as_eq(fast_remove([1, 2, 3], 0), [3, 2])
as_eq(fast_remove([1, 2, 3], 1), [1, 3])

In [6]:
g = game()
g.play((0, 0))
g.play((1, 1))

g1 = g.copy()
g1.random_finish()

g, g1

(
 [[1 0 0]
  [0 2 0]
  [0 0 0]]
 current player: 1,
 
 [[1 1 2]
  [0 2 1]
  [2 0 0]]
 winner: 2)

In [7]:
#class MCTSnode:
#    def __init__(self):
#        self.total = 0
#        self.visits = 0
#        self.C = 2
#
#    def UCB1(self, parent_visits):
#        return (total / visits) + C * math.sqrt(math.log(parent_visits) / self.visits)
#
## node for a tic tac toe monte carlo search tree
#class TTTnode(MCTSnode):
#    def __init__(self, game):
#        super().__init__()
#        self.game = game
#
#    def iterate(n=1):
#        for _ in range(1):           

In [8]:
br = '\n'
comma = ','

In [71]:
# node for a tic tac toe monte carlo search tree
# maybe I should abstract this later to an API class for arbitrary games...

class MCTSnode():
    def __init__(self, game, player=1):
        self.total = 0
        self.visits = 0
        self.C = 2 # constant for UCB1
        self.game = game
        self.player = player
        self.children = None

    def __repr__(self):
        return f'\n{str(self.game.state).replace(br, comma)} {self.total=}, {self.visits=} {self.children=}'
    
    def UCB1(self, parent_visits):
        if self.visits == 0:
            return sys.float_info.max
        else:
            return (self.total / self.visits) + self.C * math.sqrt(math.log(parent_visits) / self.visits)

    def simulate(self):
        simulation = self.game.copy()        
        #print(self.game)
        if simulation.winner == None:
            simulation.random_finish()        
        #print(self.game)
        if simulation.winner == self.player:        
            return 1
        elif simulation.winner == None:        
            return 0
        else:        
            return -1

    def rollout(self):
        score = self.simulate()
        self.total += score
        self.visits += 1
        return score

    @property
    def opponent(self):
        return self.player ^ 3
    
    def add_children(self):
        assert self.game._open_indices
        assert self.game.winner == None

        # print(f'{(self.player == self.game.playing)=}')
        
        if self.player != self.game.playing:
            # self.children = [                
            #     MCTSnode(self.game.copy().play(move), player=self.player).add_children() for move in self.game._open_indices                    
            # ]
            opponent_moves = [
                MCTSnode(self.game.copy().play(move), player=self.player) for move in self.game._open_indices
            ]
            # for opponent_move in opponent_moves:
            #     print(f'{opponent_move=}')
            #     print(f'{opponent_move.game._open_indices=}')
            self.children = [                
                MCTSnode(opponent_move.game.copy().play(move), player=self.player) for opponent_move in opponent_moves for move in opponent_move.game._open_indices
            ]
            # for child in self.children:
            #     print(child.game)
            #print(self.game._open_indices)
            
        else:
            self.children = [                
                MCTSnode(self.game.copy().play(move), player=self.player) for move in self.game._open_indices                    
            ]

        # print(f'{self.children=}')


        #return self.children
    
        # print('game and children info:')
        # print(self.game)
        # print(f'{self.children=}')
        # print(f'{len(self.children)=}')
            
            
    def iterate_best_child(self):
        #print(self.children)
        UCBs = [ (child.UCB1(self.visits), i) for (i, child) in list(enumerate(self.children)) ]
        (max_ucb, i) = max(UCBs)
        #print(max_ucb)
        score = self.children[i].iteration()
        self.total += score
        self.visits += 1
        return score
        
    def iteration(self):
        # print('iteration:')
        # print(f'{self.game.state=}')
        # print(f'{len(self.game._open_indices)=}')
        # print(f'{self.game._open_indices=}')

        if self.game.playing is None:
            score = 1 if self.game.winner == self.player else -1
            self.total += score
            self.visits += 1
            return score
        
        # if self.children is None and (self.visits == 0 or self.game._open_indices == []): # rollout leaf (including terminal nodes)
        #     #print(0)
        #     return self.rollout()
        # elif self.children is None and self.game._open_indices:
        #     #print(1)
        #     self.add_children()
        #     return self.iteration() # iterate_best_child() won't work if there is only one move left and it is P2's move I think
        #                           # because if we have one move, self.game._open_indices is true, but add_children is going to
        #                           # add no children
        # elif self.children:
        #     #print(2)
        #     return self.iterate_best_child()
        # else: # no children, no open indices
        #     print(self)
        #     assert False, "iteration bug"

        if self.visits == 0 or self.children == []: # unvisited OR guaranteed losing node -- no children
            return self.rollout()
        if self.children is None:
            self.add_children()
            return self.iteration() # can't call iterate_best_child() directly since we don't know if add_children added anything
        else: # have children
            return self.iterate_best_child()

    def iterate(self, n=1):
        for i in range(n):
            #print(i, self)
            self.iteration()


    def best_move(self):
        if self.children == None:
            return None

        best_child = self.children[0]
        best_UCB1 = best_child.UCB1(self.visits)
        for child in self.children[1:]:
            child_score = child.UCB1(self.visits)
            if child_score > best_UCB1:
                best_child = child
                best_UCB1 = child_score
    
        for i in range(3):
            for j in range(3):
                if self.game.state[i][j] == 0 and best_child.game.state[i][j] == self.player:
                    return (j, i) # if we store move this could be self.move

        print("ERROR")
        print(self.game)
        print(self.game._open_indices)
        assert False, "Didn't find any move in best_move"


    def print_direct_children(self):
        for child in self.children:
            print((child.game.state, child.total, child.visits))

#print('PARENT GAME:')
            #print(self.game)
            #for child in self.children:
                #print('CHILD GAME:')
                #print(child.game)



# the specs of MCTS I've read say you should add all children when you encounter a node that has no children but that's pretty stupid
# because you have to 

In [72]:
m = MCTSnode(game(np.array([[1, 2, 0], [2, 2, 1], [0, 1, 1]]), playing=2), player=2)
m.iterate(10000)
print(m)
m.best_move()


[[1 2 0], [2 2 1], [0 1 1]] self.total=-10000, self.visits=10000 self.children=[
[[1 2 0], [2 2 1], [2 1 1]] self.total=-4999, self.visits=4999 self.children=[], 
[[1 2 2], [2 2 1], [0 1 1]] self.total=-5000, self.visits=5000 self.children=[]]


(0, 2)

In [77]:
m = MCTSnode(game(np.array([[2, 2, 1], [1, 1, 2], [2, 1, 0]]), playing=1), player=1)
m.iterate(50)
print(m)
m.best_move()


[[2 2 1], [1 1 2], [2 1 0]] self.total=-49, self.visits=50 self.children=[
[[2 2 1], [1 1 2], [2 1 1]] self.total=-49, self.visits=49 self.children=None]


(2, 2)

In [113]:
m = MCTSnode(game(np.array([
    [0, 0, 2],
    [0, 1, 0], 
    [1, 0, 2]])))
m.iterate(10000)
m.print_direct_children()
m.best_move()

# the "best move" here is stupid and seems wrong.
# (2, 1) looks easily best to me, but MCTS has a hard time finding it.
# -- at first I thought it's because MCTS is just trying to find a winning move, and we can win anywhere,
#    so MCTS is just trying to set up a win and doesn't see to block p2 from winning next turn,
#    but actually, playing (2, 1) blocks p2 AND sets up a potential win (at (0, 1)) so idk...

# what happens when I change C?

(array([[1, 0, 2],
       [0, 1, 0],
       [1, 0, 2]]), 1098, 1185)
(array([[0, 0, 2],
       [1, 1, 0],
       [1, 0, 2]]), 2791, 2823)
(array([[0, 1, 2],
       [0, 1, 0],
       [1, 0, 2]]), 2359, 2409)
(array([[0, 0, 2],
       [0, 1, 0],
       [1, 1, 2]]), 1636, 1711)
(array([[0, 0, 2],
       [0, 1, 1],
       [1, 0, 2]]), 1801, 1871)


(2, 1)

In [101]:
m = MCTSnode(game(np.array([[2, 0, 1], [0, 1, 0], [0, 0, 2]])))
m.iterate(10000)
m.print_direct_children()

(array([[2, 0, 1],
       [1, 1, 0],
       [0, 0, 2]]), 2066, 2070)
(array([[2, 0, 1],
       [0, 1, 0],
       [1, 0, 2]]), 2131, 2131)
(array([[2, 1, 1],
       [0, 1, 0],
       [0, 0, 2]]), 1864, 1880)
(array([[2, 0, 1],
       [0, 1, 0],
       [0, 1, 2]]), 2066, 2070)
(array([[2, 0, 1],
       [0, 1, 1],
       [0, 0, 2]]), 1830, 1848)


In [102]:
m = MCTSnode(game(np.array([[1, 0, 1], [2, 0, 2], [0, 0, 0]])))
m.iterate(1000)
#print(m)
m.print_direct_children()

(array([[1, 0, 1],
       [2, 0, 2],
       [1, 0, 0]]), 175, 181)
(array([[1, 1, 1],
       [2, 0, 2],
       [0, 0, 0]]), 216, 216)
(array([[1, 0, 1],
       [2, 1, 2],
       [0, 0, 0]]), 216, 216)
(array([[1, 0, 1],
       [2, 0, 2],
       [0, 1, 0]]), 203, 205)
(array([[1, 0, 1],
       [2, 0, 2],
       [0, 0, 1]]), 175, 181)


In [95]:
m = MCTSnode(game(np.array([[0, 0, 2], [0, 1, 0], [1, 0, 2]])))
m.iterate(1000000)
print(m)
m.best_move()
m.print_direct_children()


[[0 0 2], [0 1 0], [1 0 2]] self.total=999394, self.visits=1000000 self.children=[
[[1 0 2], [0 1 0], [1 0 2]] self.total=193352, self.visits=193523 self.children=[
[[1 0 2], [2 1 1], [1 0 2]] self.total=-12, self.visits=13 self.children=[
[[1 2 2], [2 1 1], [1 1 2]] self.total=-6, self.visits=6 self.children=None, 
[[1 1 2], [2 1 1], [1 2 2]] self.total=-6, self.visits=6 self.children=None], 
[[1 1 2], [2 1 0], [1 0 2]] self.total=-12, self.visits=13 self.children=[
[[1 1 2], [2 1 1], [1 2 2]] self.total=-12, self.visits=12 self.children=None], 
[[1 0 2], [2 1 0], [1 1 2]] self.total=-12, self.visits=13 self.children=[
[[1 2 2], [2 1 1], [1 1 2]] self.total=-12, self.visits=12 self.children=None], 
[[1 2 2], [1 1 0], [1 0 2]] self.total=49084, self.visits=49084 self.children=None, 
[[1 2 2], [0 1 1], [1 0 2]] self.total=47590, self.visits=47613 self.children=[
[[1 2 2], [2 1 1], [1 1 2]] self.total=-11, self.visits=11 self.children=None, 
[[1 2 2], [1 1 1], [1 2 2]] self.total=47601,

In [66]:
m = MCTSnode(game(np.array([[1, 0, 0], [0, 0, 0], [0, 0, 0]]), playing=2), player=2)
m.iterate(50)
print(m)


[[1 0 0], [0 0 0], [0 0 0]] self.total=-10, self.visits=50 self.children=[
[[1 0 0], [2 0 0], [0 0 0]] self.total=-3, self.visits=3 self.children=[
[[1 0 0], [2 0 0], [2 0 1]] self.total=0, self.visits=0 self.children=None, 
[[1 2 0], [2 0 0], [0 0 1]] self.total=0, self.visits=0 self.children=None, 
[[1 0 0], [2 2 0], [0 0 1]] self.total=0, self.visits=0 self.children=None, 
[[1 0 0], [2 0 0], [0 2 1]] self.total=0, self.visits=0 self.children=None, 
[[1 0 2], [2 0 0], [0 0 1]] self.total=0, self.visits=0 self.children=None, 
[[1 0 0], [2 0 2], [0 0 1]] self.total=0, self.visits=0 self.children=None, 
[[1 0 0], [2 0 0], [1 0 2]] self.total=0, self.visits=0 self.children=None, 
[[1 2 0], [2 0 0], [1 0 0]] self.total=0, self.visits=0 self.children=None, 
[[1 0 0], [2 2 0], [1 0 0]] self.total=0, self.visits=0 self.children=None, 
[[1 0 0], [2 0 0], [1 2 0]] self.total=0, self.visits=0 self.children=None, 
[[1 0 2], [2 0 0], [1 0 0]] self.total=0, self.visits=0 self.children=None, 
[[1 

In [97]:
m = MCTSnode(game(np.array([[2, 0, 2],
 [1, 0, 0],
 [1, 0, 0]]), playing=1), player=1)
m.iterate(1000)
#print(m)
m.print_direct_children()

(array([[2, 1, 2],
       [1, 0, 0],
       [1, 0, 0]]), -2, 19)
(array([[2, 0, 2],
       [1, 1, 0],
       [1, 0, 0]]), 391, 446)
(array([[2, 0, 2],
       [1, 0, 0],
       [1, 1, 0]]), -6, 8)
(array([[2, 0, 2],
       [1, 0, 1],
       [1, 0, 0]]), 389, 446)
(array([[2, 0, 2],
       [1, 0, 0],
       [1, 0, 1]]), 43, 80)


In [55]:
m = MCTSnode(game())
m.iterate(10000)
print(m)


[[0 0 0], [0 0 0], [0 0 0]] self.total=5934, self.visits=10000 self.children=[
[[1 0 0], [0 0 0], [0 0 0]] self.total=202, self.visits=475 self.children=[
[[1 0 0], [1 0 0], [0 0 2]] self.total=-2, self.visits=4 self.children=[
[[1 0 0], [1 0 2], [1 0 2]] self.total=0, self.visits=0 self.children=None, 
[[1 1 0], [1 0 2], [0 0 2]] self.total=0, self.visits=0 self.children=None, 
[[1 0 0], [1 1 2], [0 0 2]] self.total=0, self.visits=0 self.children=None, 
[[1 0 0], [1 0 2], [0 1 2]] self.total=0, self.visits=0 self.children=None, 
[[1 0 1], [1 0 2], [0 0 2]] self.total=0, self.visits=0 self.children=None, 
[[1 0 0], [1 0 1], [2 0 2]] self.total=0, self.visits=0 self.children=None, 
[[1 1 0], [1 0 0], [2 0 2]] self.total=0, self.visits=0 self.children=None, 
[[1 0 0], [1 1 0], [2 0 2]] self.total=0, self.visits=0 self.children=None, 
[[1 0 0], [1 0 0], [2 1 2]] self.total=0, self.visits=0 self.children=None, 
[[1 0 1], [1 0 0], [2 0 2]] self.total=0, self.visits=0 self.children=None, 
[

In [56]:
m.iterate(20000)
print(m)


[[0 0 0], [0 0 0], [0 0 0]] self.total=23065, self.visits=30000 self.children=[
[[1 0 0], [0 0 0], [0 0 0]] self.total=202, self.visits=475 self.children=[
[[1 0 0], [1 0 0], [0 0 2]] self.total=-2, self.visits=4 self.children=[
[[1 0 0], [1 0 2], [1 0 2]] self.total=0, self.visits=0 self.children=None, 
[[1 1 0], [1 0 2], [0 0 2]] self.total=0, self.visits=0 self.children=None, 
[[1 0 0], [1 1 2], [0 0 2]] self.total=0, self.visits=0 self.children=None, 
[[1 0 0], [1 0 2], [0 1 2]] self.total=0, self.visits=0 self.children=None, 
[[1 0 1], [1 0 2], [0 0 2]] self.total=0, self.visits=0 self.children=None, 
[[1 0 0], [1 0 1], [2 0 2]] self.total=0, self.visits=0 self.children=None, 
[[1 1 0], [1 0 0], [2 0 2]] self.total=0, self.visits=0 self.children=None, 
[[1 0 0], [1 1 0], [2 0 2]] self.total=0, self.visits=0 self.children=None, 
[[1 0 0], [1 0 0], [2 1 2]] self.total=0, self.visits=0 self.children=None, 
[[1 0 1], [1 0 0], [2 0 2]] self.total=0, self.visits=0 self.children=None, 


In [12]:
print_direct_children(m)

NameError: name 'print_direct_children' is not defined

In [13]:
best_move(m)

NameError: name 'best_move' is not defined

In [14]:
for child in m.children:
    print(child.game)
    print_direct_children(child)
    print(best_move(child))


[[1 0 0]
 [0 0 0]
 [0 0 0]]
current player: 2


NameError: name 'print_direct_children' is not defined

In [15]:
g = MCTSnode(game())
g.iterate(1000)
#print(g)
g.print_direct_children()
print(g.best_move())

(array([[1, 0, 0],
       [0, 0, 0],
       [0, 0, 0]]), 117, 225)
(array([[0, 0, 0],
       [1, 0, 0],
       [0, 0, 0]]), 19, 74)
(array([[0, 0, 0],
       [0, 0, 0],
       [1, 0, 0]]), 5, 47)
(array([[0, 1, 0],
       [0, 0, 0],
       [0, 0, 0]]), 7, 51)
(array([[0, 0, 0],
       [0, 1, 0],
       [0, 0, 0]]), 182, 314)
(array([[0, 0, 0],
       [0, 0, 0],
       [0, 1, 0]]), 20, 77)
(array([[0, 0, 1],
       [0, 0, 0],
       [0, 0, 0]]), 20, 76)
(array([[0, 0, 0],
       [0, 0, 1],
       [0, 0, 0]]), 3, 43)
(array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 1]]), 29, 92)
(1, 1)


In [16]:
def diff_index(a, b, size=(3, 3)):
    x, y = size
    for i in range(y):
        for j in range(x):
            if a[i][j] != b[i][j]:
                return (j, i)
    return None

In [17]:
diff_index(np.array([[0, 0, 0], [0, 0, 0], [0, 0, 0]]), np.array([[0, 0, 0], [0, 0, 1], [0, 0, 0]]))

(2, 1)

In [18]:
def best_move(node):
    if node.children == None:
        return None

    best_child = node.children[0]
    best_UCB1 = best_child.UCB1(node.visits)
    for child in node.children[1:]:
        child_score = child.UCB1(node.visits)
        if child_score > best_UCB1:
            best_child = child
            best_UCB1 = child_score

    # return diff_index(node.game.state, best_child.game.state)

    # when we store move this could be node.move, or when we put this inside MCTSnode, self.move

    for i in range(2):
        for j in range(2):
            if node.game.state[i][j] == 0 and node.game.state[i][j] == node.player:
                return (j, i)

    assert False, "Didn't find any move in best_move"


In [19]:
m.iterate(20000)
best_move(m)

AssertionError: Didn't find any move in best_move

In [None]:
m2 = MCTSnode(game())
print(m2)
m2.iterate(2)
print(m2)
#print(m2.game._open_indices)

In [28]:
m2 = MCTSnode(game(np.array([[1, 2, 1], [2, 1, 2], [0, 0, 0]])))
#print(m2)
m2.iterate(1000)
#print(list(child.game for child in m2.children))
#print(m2)
#print(m2.game._open_indices)
#print(m2.children[1].game)
print(m2)


[[1 2 1], [2 1 2], [0 0 0]] self.total=1000, self.visits=1000 self.children=[
[[1 2 1], [2 1 2], [1 0 0]] self.total=333, self.visits=333 self.children=None, 
[[1 2 1], [2 1 2], [0 1 0]] self.total=333, self.visits=333 self.children=[
[[1 2 1], [2 1 2], [2 1 1]] self.total=166, self.visits=166 self.children=None, 
[[1 2 1], [2 1 2], [1 1 2]] self.total=166, self.visits=166 self.children=None], 
[[1 2 1], [2 1 2], [0 0 1]] self.total=333, self.visits=333 self.children=None]


In [29]:
m3 = MCTSnode(game(np.array([[1, 2, 0], [0, 1, 1], [0, 0, 2]]), playing=2))
m3.iterate(10000)
#print(m3.game.state)
#print_direct_children(m3)
print(m3)


[[1 2 0], [0 1 1], [0 0 2]] self.total=9902, self.visits=10000 self.children=[
[[1 2 1], [2 1 1], [0 0 2]] self.total=911, self.visits=924 self.children=[
[[1 2 1], [2 1 1], [2 1 2]] self.total=-6, self.visits=6 self.children=None, 
[[1 2 1], [2 1 1], [1 2 2]] self.total=917, self.visits=917 self.children=None], 
[[1 2 0], [2 1 1], [1 0 2]] self.total=924, self.visits=936 self.children=[
[[1 2 2], [2 1 1], [1 1 2]] self.total=-6, self.visits=6 self.children=None, 
[[1 2 1], [2 1 1], [1 2 2]] self.total=929, self.visits=929 self.children=None], 
[[1 2 0], [2 1 1], [0 1 2]] self.total=-8, self.visits=9 self.children=[
[[1 2 1], [2 1 1], [2 1 2]] self.total=-4, self.visits=4 self.children=None, 
[[1 2 2], [2 1 1], [1 1 2]] self.total=-4, self.visits=4 self.children=None], 
[[1 2 0], [1 1 1], [2 0 2]] self.total=1070, self.visits=1070 self.children=None, 
[[1 2 1], [0 1 1], [2 0 2]] self.total=-8, self.visits=8 self.children=[
[[1 2 1], [2 1 1], [2 1 2]] self.total=-7, self.visits=7 self.

In [30]:
children_with_move(m3, (2, 0))

[
 [[1 2 2], [1 1 1], [0 0 2]] self.total=1070, self.visits=1070 self.children=None,
 
 [[1 2 2], [0 1 1], [1 0 2]] self.total=911, self.visits=924 self.children=[
 [[1 2 2], [2 1 1], [1 1 2]] self.total=-6, self.visits=6 self.children=None, 
 [[1 2 2], [1 1 1], [1 2 2]] self.total=917, self.visits=917 self.children=None],
 
 [[1 2 2], [0 1 1], [0 1 2]] self.total=911, self.visits=924 self.children=[
 [[1 2 2], [2 1 1], [1 1 2]] self.total=-6, self.visits=6 self.children=None, 
 [[1 2 2], [1 1 1], [2 1 2]] self.total=917, self.visits=917 self.children=None]]

In [31]:
def children_with_move(node, move, player=2):
    x, y = move
    return [ child for child in node.children if child.game.state[y][x] == player ]

In [32]:
g = game()

In [33]:
g.play((1, 0))
g


[[0 1 0]
 [0 0 0]
 [0 0 0]]
current player: 2

In [34]:
g = game().play((0, 0))
g


[[1 0 0]
 [0 0 0]
 [0 0 0]]
current player: 2

In [35]:
h = g.copy()
h


[[1 0 0]
 [0 0 0]
 [0 0 0]]
current player: 2

In [36]:
list(enumerate(None))

TypeError: 'NoneType' object is not iterable

In [37]:
MCTSnode.simulate(None)

AttributeError: 'NoneType' object has no attribute 'game'

In [38]:
filter

filter

In [39]:
lst = [1, 2, 3]
list is []

False

In [40]:
l1 = []
l2 = []
l1 is l2

False

In [41]:
2 / 3

0.6666666666666666

In [42]:
max([1, 2, 56, 4, 3])

56

In [43]:
False
Falsea = [1, 2, 5, 6, 7, 10, 12, 2, 3]
a
l = [ (x, i) for (i, x) in list(enumerate(a)) ]
max(l)

NameError: name 'a' is not defined

In [44]:
# I think instead of representing the board state, I should just represent a list of moves because it's easier to read.
# it would be nice to be able to handle transpositions with memoization...
#   that'd be easier if we DO store board state...
#   maybe we shouldn't store a whole move list, we should just store a "move" field. then we can quickly construct move lists anway by just traversing the tree.

In [45]:
g = MCTSnode(game(np.array([[0, 0, 0], [0, 2, 0], [0, 0, 0]])))
g.iterate(3000)
print_direct_children(g)

NameError: name 'print_direct_children' is not defined

In [83]:
def AIgame(player=1, iterations=1000):
    assert player == 1 or player == 2, "Player can only be 1 or 2"

    g = game()
    # opponent = game.opponent
    
    while g.playing:
        print(g)
        if g.playing == player:
            x, y = None, None
            while (x, y) == (None, None):
                _x, _y = input("Input two numbers (separated by space)").split(' ')
                try:
                    (x, y) = int(_x), int(_y)
                except ValueError:
                    "Invalid input"
            g.play((x, y))
        else:
            #MCTS = MCTSnode(g.copy(), player=g.opponent)
            MCTS = MCTSnode(g.copy(), player=g.playing)
            MCTS.iterate(iterations)
            #print(MCTS)
            g.play(MCTS.best_move())

    print(g)
    #print(f'{g.winner} wins' if g.winner else 'Draw')
        

In [99]:
AIgame(player=2, iterations=10000)


[[0 0 0]
 [0 0 0]
 [0 0 0]]
current player: 1

[[0 0 0]
 [0 1 0]
 [0 0 0]]
current player: 2


Input two numbers (separated by space) 0 0



[[2 0 0]
 [0 1 0]
 [0 0 0]]
current player: 1

[[2 0 1]
 [0 1 0]
 [0 0 0]]
current player: 2


Input two numbers (separated by space) 2 2



[[2 0 1]
 [0 1 0]
 [0 0 2]]
current player: 1

[[2 0 1]
 [0 1 0]
 [0 1 2]]
current player: 2


KeyboardInterrupt: Interrupted by user

In [47]:
# I think, because I'm not copying self._open_moves (or whatever) in copy, I might be getting children which don't exist... ???
# not sure if that's right...

In [48]:
m = MCTSnode(game().play((0, 0)), player=2)
m.iterate(20000)
#m.print_direct_children()       
m.best_move()

(1, 1)

In [73]:
m = MCTSnode(game().play((0, 0)), player=2)
m.iterate(20000)
m


[[1 0 0], [0 0 0], [0 0 0]] self.total=16321, self.visits=20000 self.children=[
[[1 0 0], [0 0 0], [0 0 2]] self.total=-36, self.visits=97 self.children=[
[[1 0 0], [1 0 2], [0 0 2]] self.total=0, self.visits=2 self.children=[
[[1 1 0], [1 0 2], [2 0 2]] self.total=0, self.visits=0 self.children=None, 
[[1 1 2], [1 0 2], [0 0 2]] self.total=0, self.visits=0 self.children=None, 
[[1 1 0], [1 2 2], [0 0 2]] self.total=0, self.visits=0 self.children=None, 
[[1 1 0], [1 0 2], [0 2 2]] self.total=0, self.visits=0 self.children=None, 
[[1 0 0], [1 1 2], [2 0 2]] self.total=0, self.visits=0 self.children=None, 
[[1 2 0], [1 1 2], [0 0 2]] self.total=0, self.visits=0 self.children=None, 
[[1 0 2], [1 1 2], [0 0 2]] self.total=0, self.visits=0 self.children=None, 
[[1 0 0], [1 1 2], [0 2 2]] self.total=0, self.visits=0 self.children=None, 
[[1 0 0], [1 0 2], [2 1 2]] self.total=0, self.visits=0 self.children=None, 
[[1 2 0], [1 0 2], [0 1 2]] self.total=0, self.visits=0 self.children=None, 
[[