In [15]:
import numpy as np
import random
from game import Game, Move, Player
import math
from mcts_helper import get_legal_moves

In [4]:
class RandomPlayer(Player):
    def __init__(self) -> None:
        super().__init__()

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:
        from_pos = (random.randint(0, 4), random.randint(0, 4))
        move = random.choice([Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT])
        return from_pos, move

In [16]:
class MctsNode():
    def __init__(self, board: np.ndarray, root_id, player_id, explor_param, is_terminal=False, parent=None, parent_move=None):
        self.parent_move = parent_move
        self.player_id = player_id
        self.is_terminal = is_terminal
        self.root_id = root_id
        self._children: list[MctsNode] = []
        self._parent: MctsNode = parent
        self._board = board
        self._num_visits = 0
        self._winscore = 0
        self._explor_param = explor_param
        self._ucb = np.inf
        self._untried_moves = self.generate_moves(get_legal_moves(board.copy(), player_id))
        return
    
    def get_board(self):
        return self._board.copy()
    
    def get_children(self):
        return self._children.copy()
    
    def get_winscore(self):
        return self._winscore
    
    def get_num_visits(self):
        return self._num_visits
    
    def get_ucb(self):
        return self._ucb

    def generate_moves(self, legal_moves: list[Move]):
        board = self.get_board()
        winning_moves = []

        game = Game(board)
        for move in legal_moves:
            winning_move = game.check_if_winning_move(move[0], move[1], self.player_id)
            if winning_move:
                winning_moves.append(move)

        if len(winning_moves) > 0:
            return winning_moves
        else:
            return legal_moves

    def has_moves_left(self):
        return len(self._untried_moves) > 0
    
    def expand(self):
        if self.has_moves_left():
            move = self._untried_moves.pop()
            game = Game(self.get_board())

            game.move(move[0], move[1], self.player_id)
            next_state = game.get_board()

            winner = game.check_winner(next_state)
            game_over = (winner > -1)

            child = MctsNode(board=next_state,
                             root_id=self.root_id,
                             player_id= 1 - self.player_id,
                             explor_param=self._explor_param,
                             is_terminal= game_over,
                             parent=self,
                             parent_move=move)
            
            self._children.append(child)
            return child
        return None
    
    def calculate_result(self, winner):
        if winner == self.root_id:
            return 1
        elif winner == 1 - self.root_id:
            return 0
        else:
            return 0.5


    def simulate(self):
        board = self.get_board()
        game = Game(board)

        if self.is_terminal:
            winner = game.check_winner(board)
        
        else:
            player1 = RandomPlayer()
            player2 = RandomPlayer()

            winner = game.play(player1, player2)

        result = self.calculate_result(winner)

        return result
    
    def backpropagate(self, result):
        
        w = self._winscore + result
        n = self._num_visits + 1
        C = self._explor_param

        self._winscore = w
        self._num_visits = n
        parent = self._parent

        if parent:
            N = parent.get_num_visits()
            self.ucb = (w/n) + C * math.sqrt(N/n)
            parent.backpropagate(result)

    def select_best_child(self):
        ucb_of_children = [c.get_ucb() for c in self._children]

        return self._children[np.argmax(ucb_of_children)]
    
    def selection(self):
        current_node = self
        if not current_node.is_terminal:
            if current_node.has_moves_left():
                return current_node.expand()
            else:
                return current_node.select_best_child()
        return current_node
    
    def calculate_best_move(self):
        scores_of_children = [(c.get_winscore() / c.get_num_visits()) for c in self._children]

        best_child = self._children[np.argmax(scores_of_children)]

        best_move = best_child.parent_move

        return best_move
    
    def get_best_action(self, num_iter=1000):
        
        for i in range(num_iter):
            node = self.selection()
            if node is None:
                print("Something went wrong in the selection process.")
                return None
            result = node.simulate()
            node.backpropagate(result)
        
        best_move = self.calculate_best_move()

        return best_move

In [17]:
class MyPlayer(Player):
    def __init__(self, player_id, num_iter=500, explore_param=math.sqrt(2)) -> None:
        super().__init__()
        self._num_iter = num_iter
        self._player_id = player_id
        self._explor_param = explore_param

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:
        root = MctsNode(game.get_board(), root_id=self._player_id, player_id=self._player_id, explor_param=self._explor_param)
        best_move = root.get_best_action(self._num_iter)
        
        return best_move[0], best_move[1]

In [23]:


init_state = np.ones((5, 5), dtype=np.uint8) *(-1)
g = Game(init_state)
player1 = RandomPlayer()
player2 = MyPlayer(player_id=1, num_iter=100)
winner = g.play(player1, player2, print_num_turns=True)
#g.print_board()
print(f"Winner: Player {winner}")

Winner: Player 1


In [24]:
NUM_GAMES = 50 # this also 
EXPLORE_PARAM = math.sqrt(2) # works best for rewards in [0, 1] range
PLAYER_ID = 0

# 500 seems to be pretty good, the more the better, but it will take more time, so when judging
# the performance of my agent you can adjust it so the runtime isn't too long, but that depends on your computer.
# MCTS does require quite a bit of real time processing, but that's just the way it is.
NUM_ITER = 500 
wins = 0
player1 = MyPlayer(PLAYER_ID, NUM_ITER, EXPLORE_PARAM)
player2 = RandomPlayer()

for i in range(NUM_GAMES):
    init_state = np.ones((5, 5), dtype=np.uint8) *(-1)
    g = Game(init_state)
    winner = g.play(player1, player2)
    print(f"Winner: Player {winner}")
    wins += 1 - winner # becuase we are player 0

Winner: Player 0
Winner: Player 0
Winner: Player 0
Winner: Player 0
Winner: Player 1
Winner: Player 0
Winner: Player 0
Winner: Player 0
Winner: Player 0
Winner: Player 0
Winner: Player 0
Winner: Player 0
Winner: Player 0
Winner: Player 0
Winner: Player 0
Winner: Player 0
Winner: Player 0
Winner: Player 0
Winner: Player 0
Winner: Player 0
Winner: Player 0
Winner: Player 0
Winner: Player 0
Winner: Player 0
Winner: Player 0
Winner: Player 0
Winner: Player 0
Winner: Player 0
Winner: Player 0
Winner: Player 1


In [25]:
win_ratio = wins * 100 / NUM_GAMES
print(f"Win ratio as player1: {win_ratio}%")

Win ratio as player1: 93.33333333333333%


In [26]:
NUM_GAMES = 30
EXPLORE_PARAM = math.sqrt(2)
PLAYER_ID = 1
NUM_ITER = 300

wins = 0
player2 = MyPlayer(PLAYER_ID, NUM_ITER, EXPLORE_PARAM)
player1 = RandomPlayer()

for i in range(NUM_GAMES):
    init_state = np.ones((5, 5), dtype=np.uint8) *(-1)
    g = Game(init_state)
    winner = g.play(player1, player2)
    print(f"Winner: Player {winner}")
    wins += winner

Winner: Player 0
Winner: Player 1
Winner: Player 1
Winner: Player 1
Winner: Player 1
Winner: Player 1
Winner: Player 1
Winner: Player 1
Winner: Player 1
Winner: Player 1
Winner: Player 1
Winner: Player 1
Winner: Player 1
Winner: Player 1
Winner: Player 1
Winner: Player 1
Winner: Player 1
Winner: Player 0
Winner: Player 0
Winner: Player 1
Winner: Player 1
Winner: Player 1
Winner: Player 0
Winner: Player 1
Winner: Player 1
Winner: Player 1
Winner: Player 1
Winner: Player 1
Winner: Player 1
Winner: Player 1


In [27]:
win_ratio = wins * 100 / NUM_GAMES
print(f"Win ratio as player2: {win_ratio}%")

Win ratio as player2: 86.66666666666667%
