In [223]:
import math
import random

class TicTacToeState:
    def __init__(self):
        self.board = [[' ']*3 for _ in range(3)]
        self.player = 'X'
    
    def get_legal_moves(self):
        moves = []
        for i in range(3):
            for j in range(3):
                if self.board[i][j] == ' ':
                    moves.append((i, j))
        return moves
    
    def make_move(self, move):
        i, j = move
        new_state = TicTacToeState()
        new_state.board = [row[:] for row in self.board]
        new_state.board[i][j] = self.player
        new_state.player = 'O' if self.player == 'X' else 'X'
        return new_state
    
    def get_winner(self):
        rows = self.board + [[self.board[i][j] for i in range(3)] for j in range(3)] + [[self.board[i][i] for i in range(3)]] + [[self.board[i][2-i] for i in range(3)]]
        for row in rows:
            if row == ['X']*3:
                return -1
            elif row == ['O']*3:
                return 1
        if all([self.board[i][j] != ' ' for i in range(3) for j in range(3)]):
            return 0
        return None

class Node:
    def __init__(self, state):
        self.state = state
        self.parent = None
        self.children = []
        self.wins = 0
        self.visits = 0
        self.usedMove = set()
    def fully_expanded(self):
        moves = self.state.get_legal_moves()
        return len(self.children) == len(moves)
    
    def best_child(self):
        best = None
        for child in self.children:
            score = child.wins / child.visits + (2 * math.sqrt(math.log(self.visits) / child.visits) )
            # print(score)
            if best is None or score > best_score:
                best = child
                best_score = score
        # print(best_score)
        return best
    
    def add_child(self, child_state,move):
        child = Node(child_state)
        child.parent = self
        self.children.append(child)
        self.usedMove.add(move)
        
    def update(self, result):
        self.visits += 1
        if result == -1 and self.state.player =="X":
            self.wins+=1
        elif result == 0:
            return
        elif result == 1 and self.state.player=="O":
            self.wins+=1
        else:
            self.wins-=1
    
def random_playout(state):
    winner = state.get_winner()
    if winner is not None:
        return winner
    
    moves = state.get_legal_moves()
    move = random.choice(moves)
    new_state = state.make_move(move)
    return random_playout(new_state)

def monte_carlo_tree_search(state, iterations):
    root = Node(state)
    for i in range(iterations):
        node = root
        while node.fully_expanded() and node.children:
            node = node.best_child()
        if not node.fully_expanded():
            moves = node.state.get_legal_moves()
            move = random.choice(moves)
            while move in node.usedMove:
                move = random.choice(moves)
            node.add_child(node.state.make_move(move),move)
            # print(node.usedMove)
            node = node.children[-1]
            
        
        result = random_playout(node.state)
        while node is not None:
            node.update(result)
            print(node.state.board,node.visits,node.wins)
            # result = -result
            node = node.parent
    # for i in root.children:
    #     print(i.visits,i.wins)
    #     print(i.state.board)
    return max(root.children, key=lambda c:c.visits).state

def print_board(board):
    print("  0 1 2")
    for i in range(3):
        print(f"{i} {board[i][0]}|{board[i][1]}|{board[i][2]}")
        if i < 2:
            print("  -+-+-")
def main():
    state = TicTacToeState()
    while True:
        print_board(state.board)
        winner = state.get_winner()
        if winner is not None:
            if winner == 1:
                print("X wins!")
            elif winner == -1:
                print("O wins!")
            else:
                print("Tie!")
            break
        if state.player == 'X':
            move = input("X move (row col): ")
            i, j = map(int, move.split())
            state = state.make_move((i, j))
        else:
            print("O is thinking...")
            state = monte_carlo_tree_search(state, 1000)
    print_board(state.board)

if __name__ == '__main__':
    main()

In [227]:


def main():
    state = TicTacToeState()
    while True:
        print_board(state.board)
        winner = state.get_winner()
        if winner is not None:
            if winner == 1:
                print("X wins!")
            elif winner == -1:
                print("O wins!")
            else:
                print("Tie!")
            break
        if state.player == 'X':
            move = input("X move (row col): ")
            i, j = map(int, move.split())
            state = state.make_move((i, j))
        else:
            print("O is thinking...")
            state = monte_carlo_tree_search(state, 1000)
    print_board(state.board)

if __name__ == '__main__':
    main()


  0 1 2
0  | | 
  -+-+-
1  | | 
  -+-+-
2  | | 
  0 1 2
0  | | 
  -+-+-
1  |X| 
  -+-+-
2  | | 
O is thinking...
[[' ', ' ', ' '], ['O', 'X', ' '], [' ', ' ', ' ']] 1 1
[[' ', ' ', ' '], [' ', 'X', ' '], [' ', ' ', ' ']] 1 -1
[[' ', ' ', ' '], [' ', 'X', ' '], [' ', ' ', 'O']] 1 1
[[' ', ' ', ' '], [' ', 'X', ' '], [' ', ' ', ' ']] 2 -2
[[' ', ' ', ' '], [' ', 'X', ' '], ['O', ' ', ' ']] 1 1
[[' ', ' ', ' '], [' ', 'X', ' '], [' ', ' ', ' ']] 3 -3
[[' ', ' ', ' '], [' ', 'X', ' '], [' ', 'O', ' ']] 1 1
[[' ', ' ', ' '], [' ', 'X', ' '], [' ', ' ', ' ']] 4 -4
[[' ', 'O', ' '], [' ', 'X', ' '], [' ', ' ', ' ']] 1 1
[[' ', ' ', ' '], [' ', 'X', ' '], [' ', ' ', ' ']] 5 -5
[[' ', ' ', 'O'], [' ', 'X', ' '], [' ', ' ', ' ']] 1 1
[[' ', ' ', ' '], [' ', 'X', ' '], [' ', ' ', ' ']] 6 -6
[['O', ' ', ' '], [' ', 'X', ' '], [' ', ' ', ' ']] 1 1
[[' ', ' ', ' '], [' ', 'X', ' '], [' ', ' ', ' ']] 7 -7
[[' ', ' ', ' '], [' ', 'X', 'O'], [' ', ' ', ' ']] 1 1
[[' ', ' ', ' '], [' ', 'X', ' '], [' ',

ValueError: not enough values to unpack (expected 2, got 0)