In [3]:
import numpy as np

class TicTacToe:
    def __init__(self, grid_size):
        self.size = grid_size
        self.grid = np.zeros((grid_size, grid_size))
        self.valid_plays = [(i, j) for i, row in enumerate(self.grid) 
                            for j, value in enumerate(row) if value == 0]
    
    def play(self, player, position):
        
        if player not in [-1,1]:
            print('Player not allowed')
            return
        
        if self.check_status() == 1:
            print('The game is over')
            return
        
        if position not in self.valid_plays:
            print('Play not allowed! Try again')
            return
        
        if np.sum(self.grid == -1) == np.sum(self.grid == 1) + 1 and player == 0:
            print('Player 0 cannot play twice in a row')
            return
        
        if np.sum(self.grid == 1) == np.sum(self.grid == -1) and player == 1:
            print('Player 1 cannot play twice in a row')
            return
        
        self.grid[position] = player
        self.valid_plays.remove(position)
        
        if self.check_status():
            print(f'Player {player} Won')
            return 
        
    def check_status(self):
        
        val = len(self.grid)
        if val in np.sum(self.grid, axis = 0) or -val in np.sum(self.grid, axis = 0):
            print(val)
            print('VERTICAL WIN')
            return 1
        
        if val in np.sum(self.grid, axis = 1) or -val in np.sum(self.grid, axis = 1):
            print('HORIZONTAL WIN')
            return 1
        
        if np.trace(self.grid) == val or np.trace(self.grid) == -val:
            print('DIAGONAL WIN')
            return 1
        
        if np.trace(np.fliplr(self.grid)) == val or np.trace(np.fliplr(self.grid)) == -val:
            print('DIAGONAL WIN')
            return 1

        return 0
    
    def display_board(self):

        for i, row in enumerate(self.grid):

            row_display = " | ".join('X' if cell == 1 else 'O' if cell == -1 else ' ' for cell in row)
            
            print(" " + row_display + " ")

            if i < len(self.grid) - 1:
                print("---+" * (self.size - 1) + "---")
            
            
            

In [4]:
a = TicTacToe(9)

In [5]:
print(a.grid)

[[0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]]


In [6]:
a.play(-1, (0, 0))

In [7]:
a.grid

array([[-1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]])

In [8]:
a.display_board()

 O |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   


In [9]:
a.play(1, (1,1))

In [10]:
a.display_board()

 O |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   | X |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   


In [11]:
a.play(-1, (0,2))

In [12]:
a.play(1, (2,2))

In [13]:
a.display_board()

 O |   | O |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   | X |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   | X |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   


In [14]:
a.play(-1, (0, 1))

In [15]:
a.display_board()

 O | O | O |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   | X |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   | X |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   


In [16]:
grid = np.array([[1, -1, 0],
        [0, 0, 0],
        [0, 0, 0]])

seen_grids = [np.array([[0, -1, 1],
               [0, 0, 0],
               [0, 0, 0]])]

check_doubles(grid, seen_grids)

NameError: name 'check_doubles' is not defined

In [None]:
def build_symmetry_class(grid):
    cls = [grid,
                  np.rot90(grid, k=1),
                  np.rot90(grid, k=2),
                  np.rot90(grid, k=3),
                  grid.T,
                  np.rot90(grid.T, k=1),
                  np.rot90(grid.T, k=2),
                  np.rot90(grid.T, k=3)]
    return cls

def check_doubles(grid, seen_grids):
    for element in seen_grids:
        if any(np.array_equal(grid, candidate) for candidate in build_symmetry_class(element)):
            return 1
    return 0

    
    
def compute_possible_boards(grid, possible_grids):
    
    if np.sum(grid == 0) == 0:
        possible_grids.append(grid)
        print('GAME ALREADY OVER 1')
        return 
    val = len(grid)
    if val in np.sum(grid, axis = 0) or -val in np.sum(grid, axis = 0):
        possible_grids.append(grid)
        print('GAME ALREADY OVER 2')
        return
        
    
    if val in np.sum(grid, axis = 1) or -val in np.sum(grid, axis = 1):
        possible_grids.append(grid)
        print('GAME ALREADY OVER 3')
        return
        
    
    if np.trace(grid) == val or np.trace(grid) == -val:
        possible_grids.append(grid)
        print('GAME ALREADY OVER 4')
        return
        
    
    if np.trace(np.fliplr(grid)) == val or np.trace(np.fliplr(grid)) == -val:
        possible_grids.append(grid)
        print('GAME ALREADY OVER 5')
        return
        

    if np.sum(grid == -1) == np.sum(grid == 1):
        player = -1
    else:
        player = 1
    rows, cols = np.where(grid == 0)
    
    target_zero_count = np.count_nonzero(grid == 0) - 1

    sublist = [arr for arr in possible_grids if np.count_nonzero(arr == 0) == target_zero_count]
    
    for r, c in zip(rows, cols):
        new_grid = grid.copy()
        new_grid[r, c] = player
        if not check_doubles(new_grid, sublist):
            compute_possible_boards(new_grid, possible_grids)
        
    return possible_grids
    
        

In [None]:
grid = np.array([[0, 0, 0], 
                [0, 0, 0],
                [0, 0, 0]])
possible_grids = compute_possible_boards(grid, [])
possible_grids

GAME ALREADY OVER 5
GAME ALREADY OVER 1
GAME ALREADY OVER 4
GAME ALREADY OVER 1
GAME ALREADY OVER 1
GAME ALREADY OVER 4
GAME ALREADY OVER 4
GAME ALREADY OVER 5
GAME ALREADY OVER 2
GAME ALREADY OVER 1
GAME ALREADY OVER 1
GAME ALREADY OVER 2
GAME ALREADY OVER 2
GAME ALREADY OVER 2
GAME ALREADY OVER 3
GAME ALREADY OVER 2
GAME ALREADY OVER 1
GAME ALREADY OVER 2
GAME ALREADY OVER 1
GAME ALREADY OVER 2
GAME ALREADY OVER 2
GAME ALREADY OVER 2
GAME ALREADY OVER 2
GAME ALREADY OVER 3
GAME ALREADY OVER 3
GAME ALREADY OVER 4
GAME ALREADY OVER 2
GAME ALREADY OVER 2
GAME ALREADY OVER 1
GAME ALREADY OVER 2
GAME ALREADY OVER 4
GAME ALREADY OVER 3
GAME ALREADY OVER 1
GAME ALREADY OVER 4
GAME ALREADY OVER 3
GAME ALREADY OVER 2
GAME ALREADY OVER 4
GAME ALREADY OVER 3
GAME ALREADY OVER 2
GAME ALREADY OVER 2
GAME ALREADY OVER 5
GAME ALREADY OVER 1
GAME ALREADY OVER 5
GAME ALREADY OVER 2
GAME ALREADY OVER 1
GAME ALREADY OVER 2
GAME ALREADY OVER 2
GAME ALREADY OVER 2
GAME ALREADY OVER 2
GAME ALREADY OVER 1


KeyboardInterrupt: 

In [None]:
def create_win_grids(size=3):
    winning_masks = []
    
    # Win over rows
    for row in range(size):
        mask = 0
        for col in range(size):
            mask += 1 << (row * size + col)
        winning_masks.append(mask) 
      
    # Win over columns  
    for col in range(size):
        mask = 0
        for row in range(size):
            mask += 1 << (row * size + col)
        winning_masks.append(mask) 
    
    # Win over main diag
    mask = 0
    for row in range(size):
        mask += 1 << (row * size + row)
    winning_masks.append(mask)
    # Win over anti diag
    mask = 0
    for row in range(size):
        mask += 1 << (row * size + (size - row - 1))
    winning_masks.append(mask)
    
    return winning_masks

winning_configurations = create_win_grids(size=3)

def is_win(grid, winning_configurations):
    player_1, player_2 = grid
    for config in winning_configurations:
        if (player_1 & config) == config:
            #print(1)
            return 1
        if (player_2 & config) == config:
            #print(2)
            return 1
    return 0


def is_full(grid, size):
    conf1, conf2 = grid
    if (conf1 | conf2) == (1 << (size * size)) - 1:
        return 1
    return 0


def play_move(grid, player, move):
    if (grid[0] | grid[1]) & (1 << move) != 0:
        print('MOVE NOT ALLOWED')
        return None
    if player == 0:
        return (grid[0] | (1 << move), grid[1])
    else:
        return (grid[0], grid[1] | (1 << move))
        

In [None]:
def compute_possible_grids(grid, player, possible_grids = None, seen_by_moves = None, size = 3):
    
    if possible_grids is None:
        possible_grids = []
    
    if seen_by_moves is None:
        seen_by_moves = {i: set() for i in range(size * size + 1)}
    
    #grid = canonical_form_bit(grid, size)
    
    conf1, conf2 = grid   
    n_moves = bin(conf1 | conf2).count('1')
    
    if grid in seen_by_moves[n_moves]:
        return possible_grids
    
    
    seen_by_moves[n_moves].add(grid)
    
    if is_win(grid, winning_configurations):
        possible_grids.append(grid)
        return possible_grids
    
    if is_full(grid, size):
        possible_grids.append(grid)
        return possible_grids
    
    for move in range(size ** 2):
        if (conf1 | conf2) & (1 << move) != 0:
            continue
        new_grid = play_move(grid, player, move)
        
        if new_grid is None:
            continue
        compute_possible_grids(new_grid, 1 - player, possible_grids, seen_by_moves, size)
    

    return possible_grids
        
    
    

In [None]:
size = 4
winning_configurations = create_win_grids(size=size)
a = compute_possible_grids((0, 0), 0, None, None, size)

In [None]:
len(a)

659392

In [None]:
def compute_possible_grids_with_dict(grid, player, possible_grids = None, seen_by_moves = None, size = 3):
    
    if possible_grids is None:
        possible_grids = []
    
    if seen_by_moves is None:
        seen_by_moves = {i: dict() for i in range(size * size + 1)}
        
    conf1, conf2 = grid   
    n_moves = bin(conf1 | conf2).count('1')
    
    if grid in seen_by_moves[n_moves]:
        return seen_by_moves
    
    seen_by_moves[n_moves][grid] = (None, None)
    
    if is_win(grid, winning_configurations):
        possible_grids.append(grid)
        seen_by_moves[n_moves][grid] = (None, -1)
        return seen_by_moves
    
    if is_full(grid, size):
        possible_grids.append(grid)
        seen_by_moves[n_moves][grid] = (None, 0)
        return seen_by_moves
    
    current_best_score = -1
    current_best_move = None
    for move in range(size ** 2):
        if (conf1 | conf2) & (1 << move) != 0:
            continue
        new_grid = play_move(grid, player, move)
        
        if new_grid is None:
            continue
        seen_by_moves = compute_possible_grids_with_dict(new_grid, 1 - player, possible_grids, seen_by_moves, size)
        
        move_score = - seen_by_moves[n_moves + 1][new_grid][1]
        if move_score > current_best_score:
            current_best_score = move_score
            current_best_move = move
    seen_by_moves[n_moves][grid] = (current_best_move, current_best_score)

    return seen_by_moves
        

In [None]:
size = 4
winning_configurations = create_win_grids(size=size)
a = compute_possible_grids_with_dict((0, 0), 0, None, None, size)

In [None]:
a

{0: {(0, 0): (0, 0)},
 1: {(1, 0): (1, 0),
  (2, 0): (0, 0),
  (4, 0): (0, 0),
  (8, 0): (0, 0),
  (16, 0): (0, 0),
  (32, 0): (0, 0),
  (64, 0): (0, 0),
  (128, 0): (0, 0),
  (256, 0): (0, 0),
  (512, 0): (0, 0),
  (1024, 0): (0, 0),
  (2048, 0): (0, 0),
  (4096, 0): (0, 0),
  (8192, 0): (0, 0),
  (16384, 0): (0, 0),
  (32768, 0): (0, 0)},
 2: {(1, 2): (2, 0),
  (1, 4): (1, 0),
  (1, 8): (1, 0),
  (1, 16): (1, 0),
  (1, 32): (1, 0),
  (1, 64): (1, 0),
  (1, 128): (1, 0),
  (1, 256): (1, 0),
  (1, 512): (1, 0),
  (1, 1024): (1, 0),
  (1, 2048): (1, 0),
  (1, 4096): (1, 0),
  (1, 8192): (1, 0),
  (1, 16384): (1, 0),
  (1, 32768): (1, 0),
  (2, 1): (2, 0),
  (2, 4): (0, 0),
  (2, 8): (0, 0),
  (2, 16): (0, 0),
  (2, 32): (0, 0),
  (2, 64): (0, 0),
  (2, 128): (0, 0),
  (2, 256): (0, 0),
  (2, 512): (0, 0),
  (2, 1024): (0, 0),
  (2, 2048): (0, 0),
  (2, 4096): (0, 0),
  (2, 8192): (0, 0),
  (2, 16384): (0, 0),
  (2, 32768): (0, 0),
  (4, 1): (1, 0),
  (4, 2): (0, 0),
  (4, 8): (0, 0),
  

In [None]:
bin0 = bin(15)[2:].zfill(3**2)
bin1 = bin(7)[2:].zfill(3**2)

bin0, bin1

('000001111', '000000111')

In [9]:
bin(1 << 0)[2:].zfill(3**2)[::-1]

'100000000'

In [10]:
bin0 = bin(15)[2:].zfill(3**2)[::-1]
bin1 = bin(6)[2:].zfill(3**2)[::-1]

rows = [[bin0[i:i+3] , bin1[i:i+3]] for i in range(0, 3**2, 3)]
rows

[['111', '011'], ['100', '000'], ['000', '000']]

In [11]:
bin0

'111100000'

In [12]:
bin1

'011000000'

In [6]:
from tictactoe_class import TicTacToe

a = TicTacToe(4)


In [7]:
a.PlayGame('Rocco', 'ESBot')

KeyboardInterrupt: 

In [1]:
bin(0 | 0).count('1')

0

In [5]:
thislist = ["apple", "banana", "cherry"]
thislist.remove("banana")
print(thislist)

['apple', 'cherry']


In [7]:
import numpy as np
l = [1,3,5]
np.array([child for child in l]).argmax()

2

In [105]:
from utils import *
import os
import numpy as np
import math
import random

class MCTSNode:
    def __init__(self, state, valid_moves, player, parent=None):
        """
        Each node represents a state in the game.
        - state: instance of TicTacToeState.
        - parent: parent node (None for root).
        - move: the move that led to this state.
        """
        self.state = state
        self.valid_moves = valid_moves
        self.player = player # player that have to move next

        self.parent = parent
        self.children = []

        self.N = 0
        self.V = 0
        self.ucb = math.inf

    def update_board(self, move):
        """Update the board with the given move."""

        if (self.state[0] | self.state[1]) & (1 << move) != 0:
            #raise ValueError('Invalid move')
            print('Invalid move')

        new_moves = self.valid_moves.copy()
        new_moves.remove(move)
        if self.player == 0:
            return (self.state[0] | (1 << move), self.state[1]), new_moves
        else:
            return (self.state[0], self.state[1] | (1 << move)), new_moves
        

    def best_child(self, exploration=1.41):
        """Select the child with the highest UCT (Upper Confidence Bound) value."""
        choices_weights = []
        for child in self.children:
            if child.visits == 0:
                choices_weights.append(float('inf'))
            else:
                exploitation = child.wins / child.visits
                exploration_term = exploration * math.sqrt(math.log(self.visits) / child.visits)
                choices_weights.append(exploitation + exploration_term)
        return self.children[choices_weights.index(max(choices_weights))]


In [145]:
class MCSTBot:

    def __init__(self, size, winning_configurations, player, n_iterations = 100):
        self.n_iterations = n_iterations
        self.name = 'MCSTBot'
        self.winning_configurations = winning_configurations
        self.size = size
        self.player = player

        self.root = None # starting configuration on which we start building the tree

    def __str__(self):
        return self.name
    
    def resources_left(self):
        return True
    
    def next_move(self, current_state, valid_moves):
        
        self.root = MCTSNode(current_state, valid_moves, self.player)

        for move in self.root.valid_moves:
            
            new_state, new_valid_moves = self.root.update_board(move)
            new_node = MCTSNode(new_state, new_valid_moves, 1 - self.root.player, self.root)
            print(new_state)
            self.root.children.append(new_node)

        
        ucb_scores = self._build_strategy()
        
        return self._select_best_move(ucb_scores)

    def _select_best_move(self, ucb_scores):

        return self.root.valid_moves[np.array(ucb_scores).argmax()]

    def _build_strategy(self):
        
        #while resources_left():
        for iteration in range(self.n_iterations):
            print(f'CURRENT UCB: {[compute_ucb(leaf) for leaf in self.root.children]}')
            print('SELECTING')
            leaf = self._select()
            print(f'SELECTED NODE: {leaf.state}\n')
            print('EXPANDING')
            leaf = self._expand(leaf)
            print(f'EXPANSION DONE')
            print(f'SIMULATING')
            result = self._simulate(leaf.state, leaf.valid_moves, leaf.player)
            print(f'SIMULATION OVER. RESULT: {result}')
            print(f'BACKPROPAGATING')
            self._backpropagate(leaf, result)

        print([compute_ucb(child) for child in self.root.children])
        print([leaf.V/leaf.N for leaf in self.root.children])
        print([leaf.N for leaf in self.root.children])
        return [compute_ucb(child) for child in self.root.children]
    
    def _select(self):
        """
        Select the best leaf node to expand.
        """

        leaf = self.root

        while leaf.children != []:
            
            best_child = np.array([compute_ucb(child) for child in leaf.children]).argmax()

            leaf = leaf.children[best_child]
        
        return leaf

    def _expand(self, leaf):
        """
        Expand the leaf node by adding all possible children.
        """

        if leaf.N == 0:
            print('LEAF NOT VISITED. NOT EXPANDING')
            return leaf
        print(f'LEAF: {leaf.state}')
        print(f'LEFT MOVES: {leaf.valid_moves}')
        for move in leaf.valid_moves:
            
            new_state, new_valid_moves = leaf.update_board(move)
            new_node = MCTSNode(new_state, new_valid_moves, 1 - leaf.player, leaf)

            leaf.children.append(new_node)

        return leaf.children[0]


    def _simulate(self, board, valid_moves, player):
        """ Rollout a game from the given node """

        print(f'board in simulation: {board}')
        print(f'valid moves {valid_moves}')
        if is_win(board, self.winning_configurations) and player == self.player:
            print('LOSS FOR PLAYER')
            return -1
        elif is_win(board, self.winning_configurations) and player != self.player:
            print('WIN FOR PLAYER')
            return 1
        elif is_full(board, self.size):
            print('TIE')
            return 0
        else:
            
            move = random.choice(valid_moves)
            next_player_board = board[player] | (1 << move)
            if player == 0:
                next_board = (next_player_board, board[1])
            else:
                next_board = (board[0], next_player_board)
            remaining_valid_moves = valid_moves.copy()
            remaining_valid_moves.remove(move)


            return self._simulate(next_board, remaining_valid_moves, 1 - player)


    def _backpropagate(self, node, result):
        """ Update the node statistics """

        node.N += 1
        node.V += result
        
        if node.parent is not None:
            self._backpropagate(node.parent, result)

    def print_tree(self, node, indent=0):
        print(" " * indent + str(node.status))
        # Recursively print each child, increasing the indentation.
        for child in node.children:
            self.print_tree(child, indent + 4)

def compute_ucb(leaf):
    if leaf.parent is None:
        print('CANNOT COMPUTE UCB FOR THE ROOT')
        return None
    if leaf.N == 0:
        return math.inf
    return leaf.V/leaf.N + 2* math.sqrt((2 * math.log(leaf.parent.N)) / leaf.N)

        


In [None]:
a = MCSTBot(3, create_win_grids(3), 0, 1000)
a.next_move((0, 0), [i for i in range(9)])

In [149]:
a.next_move((0, 0), [i for i in range(9)])

(1, 0)
(2, 0)
(4, 0)
(8, 0)
(16, 0)
(32, 0)
(64, 0)
(128, 0)
(256, 0)
CURRENT UCB: [inf, inf, inf, inf, inf, inf, inf, inf, inf]
SELECTING
SELECTED NODE: (1, 0)

EXPANDING
LEAF NOT VISITED. NOT EXPANDING
EXPANSION DONE
SIMULATING
board in simulation: (1, 0)
valid moves [1, 2, 3, 4, 5, 6, 7, 8]
board in simulation: (1, 128)
valid moves [1, 2, 3, 4, 5, 6, 8]
board in simulation: (3, 128)
valid moves [2, 3, 4, 5, 6, 8]
board in simulation: (3, 192)
valid moves [2, 3, 4, 5, 8]
board in simulation: (19, 192)
valid moves [2, 3, 5, 8]
board in simulation: (19, 224)
valid moves [2, 3, 8]
board in simulation: (27, 224)
valid moves [2, 8]
board in simulation: (27, 480)
valid moves [2]
LOSS FOR PLAYER
SIMULATION OVER. RESULT: -1
BACKPROPAGATING
CURRENT UCB: [-1.0, inf, inf, inf, inf, inf, inf, inf, inf]
SELECTING
SELECTED NODE: (2, 0)

EXPANDING
LEAF NOT VISITED. NOT EXPANDING
EXPANSION DONE
SIMULATING
board in simulation: (2, 0)
valid moves [0, 2, 3, 4, 5, 6, 7, 8]
board in simulation: (2, 16)
v

0