In [1]:
import numpy as np
import random
from game import Game, Move, Player
import math

In [2]:
def get_legal_moves(player_id: int, board: np.ndarray):
    SIDES = [(1,0), (2,0), (3,0),
                   (1,4), (2,4), (3,4),
                   (0,1), (0,2), (0,3), 
                   (4,1), (4,2), (4,3)]
    CORNERS = [(0,0), (0,4), (4,0), (4,4)]

    legal_sides = []
    legal_corners = []

    for poss_take in SIDES:
        if board[poss_take] < 0 or board[poss_take] == player_id:
            legal_sides.append(poss_take)
    
    for poss_take in CORNERS:
        if board[poss_take] < 0 or board[poss_take] == player_id:
            legal_corners.append(poss_take)

    moves = []

    for take in legal_sides:
        # if it is at the TOP, it can be moved down, left or right
        if take[0] == 0:
            moves.append((take, Move.BOTTOM))
            moves.append((take, Move.LEFT))
            moves.append((take, Move.RIGHT))
        # if it is at the BOTTOM, it can be moved up, left or right
        elif take[0] == 4:
            moves.append((take, Move.TOP))
            moves.append((take, Move.LEFT))
            moves.append((take, Move.RIGHT))
        # if it is on the LEFT, it can be moved up, down or right
        elif take[1] == 0:
            moves.append((take, Move.BOTTOM))
            moves.append((take, Move.TOP))
            moves.append((take, Move.RIGHT))
        # if it is on the RIGHT, it can be moved up, down or left
        elif take[1] == 4:
            moves.append((take, Move.BOTTOM))
            moves.append((take, Move.TOP))
            moves.append((take, Move.LEFT))

    for take in legal_corners:
        # if it is in the upper left corner, it can be moved to the right and down
        if take == (0, 0):
            moves.append((take, Move.BOTTOM))
            moves.append((take, Move.RIGHT))
        # if it is in the lower left corner, it can be moved to the right and up
        elif take == (4, 0):
            moves.append((take, Move.TOP))
            moves.append((take, Move.RIGHT))
        # if it is in the upper right corner, it can be moved to the left and down
        elif take == (0, 4):
            moves.append((take, Move.BOTTOM))
            moves.append((take, Move.LEFT))
        # if it is in the lower right corner, it can be moved to the left and up
        elif take == (4, 4):
            moves.append((take, Move.TOP))
            moves.append((take, Move.LEFT))
    
    random.shuffle(moves)
    
    return moves


In [3]:
class RandomPlayer(Player):
    def __init__(self) -> None:
        super().__init__()

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:
        from_pos = (random.randint(0, 4), random.randint(0, 4))
        move = random.choice([Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT])
        return from_pos, move

In [65]:
class MctsNode():
    def __init__(self, board: np.ndarray, root_id, player_id, explor_param, is_terminal=False, parent=None, parent_move=None):
        self.board = board
        self.parent: MctsNode = parent
        self.parent_move = parent_move
        self.player_id = player_id
        self.children: list[MctsNode] = []
        self.num_visits = 0
        self.is_terminal = is_terminal
        self.root_id = root_id
        self._winscore = 0
        self._explor_param = explor_param
        self._ucb = np.inf
        self._untried_moves = get_legal_moves(self.player_id, self.board)
        return
    
    def has_moves_left(self):
        return len(self._untried_moves) > 0
    
    def get_ucb(self):
        return self._ucb
    
    def expand(self):
        if self.has_moves_left():
            move = self._untried_moves.pop()
            game = Game(self.board)
            game.move(move[0], move[1], self.player_id)

            next_state = game.get_board()

            winner = game.check_winner()

            game_over = (winner > -1)

            child = MctsNode(board=next_state,
                             root_id=self.root_id,
                             player_id= 1 - self.player_id,
                             explor_param=self._explor_param,
                             is_terminal= game_over,
                             parent=self,
                             parent_move=move)
            
            self.children.append(child)
            return child
        return None
    
    def calculate_result(self, winner):
        "Check the winner. Returns the player ID of the winner if any, otherwise returns -1"

        if winner == self.root_id:
            return 1
        elif winner == 1 - self.root_id:
            return -1
        else:
            return 0


    def simulate(self):
        game = Game(self.board)

        if self.is_terminal:
            winner = game.check_winner()
        
        else:
            player1 = RandomPlayer()
            player2 = RandomPlayer()

            winner = game.play(player1, player2)

        result = self.calculate_result(winner)

        return result
    
    def backpropagate(self, result):
        w = self._winscore + result
        n = self.num_visits + 1
        C = self._explor_param

        self._winscore = w
        self.num_visits = n
        parent = self.parent

        if parent:
            N = parent.num_visits
            self.ucb = (w/n) + C * math.sqrt(N/n)
            parent.backpropagate(result)

    def select_best_child(self):
        ucb_of_children = [c.get_ucb() for c in self.children]

        return self.children[np.argmax(ucb_of_children)]
    
    def selection(self):
        current_node = self
        if not current_node.is_terminal:
            if current_node.has_moves_left():
                return current_node.expand()
            else:
                return current_node.select_best_child()
        return current_node
    
    def calculate_best_move(self):
        scores_of_children = [(c._winscore / c.num_visits) for c in self.children]

        best_child = self.children[np.argmax(scores_of_children)]

        best_move = best_child.parent_move

        return best_move
    
    def get_best_action(self, num_iter=1000):
        
        for i in range(num_iter):
            node = self.selection()
            if node is None:
                print("Something went wrong in the selection process.")
                return None
            result = node.simulate()
            node.backpropagate(result)
        
        best_move = self.calculate_best_move()

        return best_move

In [66]:
class MyPlayer(Player):
    def __init__(self, player_id, num_iter=1000) -> None:
        super().__init__()
        self._num_iter = num_iter
        self._player_id = player_id

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:
        root = MctsNode(game.get_board(), root_id=self._player_id, player_id=self._player_id, explor_param=0.1)
        best_move = root.get_best_action(self._num_iter)
        
        return best_move[0], best_move[1]

In [78]:
init_state = np.ones((5, 5), dtype=np.uint8) *(-1)
g = Game(init_state)
player1 = RandomPlayer()
player2 = MyPlayer(player_id=1, num_iter=1000)
winner = g.play(player1, player2)
g.print()
print(f"Winner: Player {winner}")

[[ 0  1  1  0  1]
 [ 1  1 -1  0 -1]
 [ 0  1 -1 -1  0]
 [ 0  1  0 -1  0]
 [ 1  1  0  0  0]]
Winner: Player 1
