In [152]:
import numpy as np

class TicTacToe:
    def __init__(self, grid_size):
        self.size = grid_size
        self.grid = np.zeros((grid_size, grid_size))
        self.valid_plays = [(i, j) for i, row in enumerate(self.grid) 
                            for j, value in enumerate(row) if value == 0]
    
    def play(self, player, position):
        
        if player not in [-1,1]:
            print('Player not allowed')
            return
        
        if self.check_status() == 1:
            print('The game is over')
            return
        
        if position not in self.valid_plays:
            print('Play not allowed! Try again')
            return
        
        if np.sum(self.grid == -1) == np.sum(self.grid == 1) + 1 and player == 0:
            print('Player 0 cannot play twice in a row')
            return
        
        if np.sum(self.grid == 1) == np.sum(self.grid == -1) and player == 1:
            print('Player 1 cannot play twice in a row')
            return
        
        self.grid[position] = player
        self.valid_plays.remove(position)
        
        if self.check_status():
            print(f'Player {player} Won')
            return 
        
    def check_status(self):
        
        val = len(self.grid)
        if val in np.sum(self.grid, axis = 0) or -val in np.sum(self.grid, axis = 0):
            print(val)
            print('VERTICAL WIN')
            return 1
        
        if val in np.sum(self.grid, axis = 1) or -val in np.sum(self.grid, axis = 1):
            print('HORIZONTAL WIN')
            return 1
        
        if np.trace(self.grid) == val or np.trace(self.grid) == -val:
            print('DIAGONAL WIN')
            return 1
        
        if np.trace(np.fliplr(self.grid)) == val or np.trace(np.fliplr(self.grid)) == -val:
            print('DIAGONAL WIN')
            return 1

        return 0
    
    def display_board(self):

        for i, row in enumerate(self.grid):

            row_display = " | ".join('X' if cell == 1 else 'O' if cell == -1 else ' ' for cell in row)
            
            print(" " + row_display + " ")

            if i < len(self.grid) - 1:
                print("---+" * (self.size - 1) + "---")
            
            
            

In [4]:
a = TicTacToe(9)

In [5]:
print(a.grid)

[[0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]]


In [6]:
a.play(-1, (0, 0))

In [7]:
a.grid

array([[-1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]])

In [8]:
a.display_board()

 O |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   


In [9]:
a.play(1, (1,1))

In [10]:
a.display_board()

 O |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   | X |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   


In [11]:
a.play(-1, (0,2))

In [12]:
a.play(1, (2,2))

In [13]:
a.display_board()

 O |   | O |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   | X |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   | X |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   


In [14]:
a.play(-1, (0, 1))

In [15]:
a.display_board()

 O | O | O |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   | X |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   | X |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   
---+---+---+---+---+---+---+---+---
   |   |   |   |   |   |   |   |   


In [6]:
from tictactoe_class import TicTacToe

a = TicTacToe(3)


In [8]:
a.PlayGame('ESBot', 'MCTSBot')


Player 1: ESBot will play as X

Player 2: MCTSBot will play as O

Current Board:
   |   |   
---+---+---
   |   |   
---+---+---
   |   |   

Current Board:
 X |   |   
---+---+---
   |   |   
---+---+---
   |   |   

Current Board:
 X |   |   
---+---+---
   | O |   
---+---+---
   |   |   

Current Board:
 X | X |   
---+---+---
   | O |   
---+---+---
   |   |   

Current Board:
 X | X |   
---+---+---
 O | O |   
---+---+---
   |   |   

Player 1: ESBot will play as X
Player 2: MCTSBot will play as O

Final Board:
 X | X | X 
---+---+---
 O | O |   
---+---+---
   |   |   
Congratulations! ESBot wins! 🎉


In [1]:
bin(0 | 0).count('1')

0

In [5]:
thislist = ["apple", "banana", "cherry"]
thislist.remove("banana")
print(thislist)

['apple', 'cherry']


In [7]:
import numpy as np
l = [1,3,5]
np.array([child for child in l]).argmax()

2

In [105]:
from utils import *
import os
import numpy as np
import math
import random

class MCTSNode:
    def __init__(self, state, valid_moves, player, parent=None):
        """
        Each node represents a state in the game.
        - state: instance of TicTacToeState.
        - parent: parent node (None for root).
        - move: the move that led to this state.
        """
        self.state = state
        self.valid_moves = valid_moves
        self.player = player # player that have to move next

        self.parent = parent
        self.children = []

        self.N = 0
        self.V = 0
        self.ucb = math.inf

    def update_board(self, move):
        """Update the board with the given move."""

        if (self.state[0] | self.state[1]) & (1 << move) != 0:
            #raise ValueError('Invalid move')
            print('Invalid move')

        new_moves = self.valid_moves.copy()
        new_moves.remove(move)
        if self.player == 0:
            return (self.state[0] | (1 << move), self.state[1]), new_moves
        else:
            return (self.state[0], self.state[1] | (1 << move)), new_moves
        

    def best_child(self, exploration=1.41):
        """Select the child with the highest UCT (Upper Confidence Bound) value."""
        choices_weights = []
        for child in self.children:
            if child.visits == 0:
                choices_weights.append(float('inf'))
            else:
                exploitation = child.wins / child.visits
                exploration_term = exploration * math.sqrt(math.log(self.visits) / child.visits)
                choices_weights.append(exploitation + exploration_term)
        return self.children[choices_weights.index(max(choices_weights))]


In [145]:
class MCSTBot:

    def __init__(self, size, winning_configurations, player, n_iterations = 100):
        self.n_iterations = n_iterations
        self.name = 'MCSTBot'
        self.winning_configurations = winning_configurations
        self.size = size
        self.player = player

        self.root = None # starting configuration on which we start building the tree

    def __str__(self):
        return self.name
    
    def resources_left(self):
        return True
    
    def next_move(self, current_state, valid_moves):
        
        self.root = MCTSNode(current_state, valid_moves, self.player)

        for move in self.root.valid_moves:
            
            new_state, new_valid_moves = self.root.update_board(move)
            new_node = MCTSNode(new_state, new_valid_moves, 1 - self.root.player, self.root)
            print(new_state)
            self.root.children.append(new_node)

        
        ucb_scores = self._build_strategy()
        
        return self._select_best_move(ucb_scores)

    def _select_best_move(self, ucb_scores):

        return self.root.valid_moves[np.array(ucb_scores).argmax()]

    def _build_strategy(self):
        
        #while resources_left():
        for iteration in range(self.n_iterations):
            print(f'CURRENT UCB: {[compute_ucb(leaf) for leaf in self.root.children]}')
            print('SELECTING')
            leaf = self._select()
            print(f'SELECTED NODE: {leaf.state}\n')
            print('EXPANDING')
            leaf = self._expand(leaf)
            print(f'EXPANSION DONE')
            print(f'SIMULATING')
            result = self._simulate(leaf.state, leaf.valid_moves, leaf.player)
            print(f'SIMULATION OVER. RESULT: {result}')
            print(f'BACKPROPAGATING')
            self._backpropagate(leaf, result)

        print([compute_ucb(child) for child in self.root.children])
        print([leaf.V/leaf.N for leaf in self.root.children])
        print([leaf.N for leaf in self.root.children])
        return [compute_ucb(child) for child in self.root.children]
    
    def _select(self):
        """
        Select the best leaf node to expand.
        """

        leaf = self.root

        while leaf.children != []:
            
            best_child = np.array([compute_ucb(child) for child in leaf.children]).argmax()

            leaf = leaf.children[best_child]
        
        return leaf

    def _expand(self, leaf):
        """
        Expand the leaf node by adding all possible children.
        """

        if leaf.N == 0:
            print('LEAF NOT VISITED. NOT EXPANDING')
            return leaf
        print(f'LEAF: {leaf.state}')
        print(f'LEFT MOVES: {leaf.valid_moves}')
        for move in leaf.valid_moves:
            
            new_state, new_valid_moves = leaf.update_board(move)
            new_node = MCTSNode(new_state, new_valid_moves, 1 - leaf.player, leaf)

            leaf.children.append(new_node)

        return leaf.children[0]


    def _simulate(self, board, valid_moves, player):
        """ Rollout a game from the given node """

        print(f'board in simulation: {board}')
        print(f'valid moves {valid_moves}')
        if is_win(board, self.winning_configurations) and player == self.player:
            print('LOSS FOR PLAYER')
            return -1
        elif is_win(board, self.winning_configurations) and player != self.player:
            print('WIN FOR PLAYER')
            return 1
        elif is_full(board, self.size):
            print('TIE')
            return 0
        else:
            
            move = random.choice(valid_moves)
            next_player_board = board[player] | (1 << move)
            if player == 0:
                next_board = (next_player_board, board[1])
            else:
                next_board = (board[0], next_player_board)
            remaining_valid_moves = valid_moves.copy()
            remaining_valid_moves.remove(move)


            return self._simulate(next_board, remaining_valid_moves, 1 - player)


    def _backpropagate(self, node, result):
        """ Update the node statistics """

        node.N += 1
        node.V += result
        
        if node.parent is not None:
            self._backpropagate(node.parent, result)

    def print_tree(self, node, indent=0):
        print(" " * indent + str(node.status))
        # Recursively print each child, increasing the indentation.
        for child in node.children:
            self.print_tree(child, indent + 4)

def compute_ucb(leaf):
    if leaf.parent is None:
        print('CANNOT COMPUTE UCB FOR THE ROOT')
        return None
    if leaf.N == 0:
        return math.inf
    return leaf.V/leaf.N + 2* math.sqrt((2 * math.log(leaf.parent.N)) / leaf.N)

        


In [None]:
a = MCSTBot(3, create_win_grids(3), 0, 1000)
a.next_move((0, 0), [i for i in range(9)])

In [149]:
a.next_move((0, 0), [i for i in range(9)])

(1, 0)
(2, 0)
(4, 0)
(8, 0)
(16, 0)
(32, 0)
(64, 0)
(128, 0)
(256, 0)
CURRENT UCB: [inf, inf, inf, inf, inf, inf, inf, inf, inf]
SELECTING
SELECTED NODE: (1, 0)

EXPANDING
LEAF NOT VISITED. NOT EXPANDING
EXPANSION DONE
SIMULATING
board in simulation: (1, 0)
valid moves [1, 2, 3, 4, 5, 6, 7, 8]
board in simulation: (1, 128)
valid moves [1, 2, 3, 4, 5, 6, 8]
board in simulation: (3, 128)
valid moves [2, 3, 4, 5, 6, 8]
board in simulation: (3, 192)
valid moves [2, 3, 4, 5, 8]
board in simulation: (19, 192)
valid moves [2, 3, 5, 8]
board in simulation: (19, 224)
valid moves [2, 3, 8]
board in simulation: (27, 224)
valid moves [2, 8]
board in simulation: (27, 480)
valid moves [2]
LOSS FOR PLAYER
SIMULATION OVER. RESULT: -1
BACKPROPAGATING
CURRENT UCB: [-1.0, inf, inf, inf, inf, inf, inf, inf, inf]
SELECTING
SELECTED NODE: (2, 0)

EXPANDING
LEAF NOT VISITED. NOT EXPANDING
EXPANSION DONE
SIMULATING
board in simulation: (2, 0)
valid moves [0, 2, 3, 4, 5, 6, 7, 8]
board in simulation: (2, 16)
v

0