In [37]:
import math
import random


class Node:
    def __init__(self, state, parent=None):
        self.state = state  # current state
        self.parent = parent  # parent node
        self.children = []  # child nodes
        self.wins = 0  # number of wins
        self.visits = 0  # number of visits

    def is_fully_expanded(self):
        """Check if all possible actions have been expanded"""
        return len(self.children) == len(self.state.get_legal_actions())

    def select_child(self):
        """Select a child node based on the UCB1 formula"""
        log_parent_visits = math.log(self.visits)
        return max(
            self.children,
            key=lambda child: child.wins / child.visits
            + math.sqrt(2 * log_parent_visits / child.visits),
        )

    def expand(self):
        """Expand a new child node"""
        action = self.state.get_legal_actions()[len(self.children)]
        new_state = self.state.perform_action(action)
        new_child = Node(new_state, self)
        self.children.append(new_child)
        return new_child

    def update(self, result):
        """Update the node's statistics"""
        self.visits += 1
        self.wins += result


class TicTacToeState:
    def copy(self):
        """Create a copy of the current state"""
        return TicTacToeState(self.board.copy(), self.player)

    def __init__(self, board=None, player=1):
        self.board = (
            board if board is not None else [0] * 9
        )  # Board, 0 means empty, 1 means player 1, -1 means player 2
        self.player = player  # Current player

    def get_legal_actions(self):
        """Get legal actions (empty positions)"""
        return [i for i, val in enumerate(self.board) if val == 0]

    def perform_action(self, action):
        """Perform an action and return the new state"""
        new_board = self.board.copy()
        new_board[action] = self.player
        return TicTacToeState(new_board, -self.player)

    def is_terminal(self):
        """Check if the game has ended"""
        return self.check_winner() or not self.get_legal_actions()

    def check_winner(self):
        """Check if there is a winner"""
        lines = [
            [0, 1, 2],
            [3, 4, 5],
            [6, 7, 8],  # Rows
            [0, 3, 6],
            [1, 4, 7],
            [2, 5, 8],  # Columns
            [0, 4, 8],
            [2, 4, 6],  # Diagonals
        ]
        for line in lines:
            if self.board[line[0]] == self.board[line[1]] == self.board[line[2]] != 0:
                return self.board[line[0]]
        return 0

    def get_result(self, player):
        """Get the game result (assuming the game has ended)"""
        winner = self.check_winner()
        if winner == 0:
            return 0.5  # Draw
        return 1 if winner == player else 0


def mcts(root_state, iterations=1000):
    root_node = Node(root_state)

    for _ in range(iterations):
        node = root_node
        state = root_state.copy()

        # Selection phase
        while not state.is_terminal() and node.is_fully_expanded():
            node = node.select_child()
            state = node.state

        # Expansion phase
        if not state.is_terminal():
            node = node.expand()
            state = node.state

        # Simulation phase
        while not state.is_terminal():
            state = state.perform_action(random.choice(state.get_legal_actions()))

        # Backpropagation phase
        result = state.get_result(root_node.state.player)
        while node is not None:
            node.update(result)
            node = node.parent

    # If the root node has no children, return the root node's state
    # if not root_node.children:
    #     return root_node.state

    # Return the child node with the most visits
    return max(root_node.children, key=lambda child: child.visits).state


# Example usage
if __name__ == "__main__":
    board = [1, 0, 1, -1, 1, 0, -1, 0, -1]
    player = -1
    initial_state = TicTacToeState(board=board, player=player)
    best_move_state = mcts(initial_state, iterations=1000)
    print("Best move board:", best_move_state.board)
    print("check_winner:", best_move_state.check_winner())

Best move board: [1, 0, 1, -1, 1, 0, -1, -1, -1]
check_winner: -1
