In [1]:
from tictactoe import Board2D, Board3D
import numpy as np
import copy

In [53]:
class Node:
    def __init__(self, state, parent, move):
        self.state = state
        self.parent = parent
        self.move = move
        
        self.who_placed = (self.state.current_player + 1)%2
        self.children = []
        
    def get_children(self):
        children = []
        current_player = self.state.current_player
        for move in self.state.get_legal_moves():
            board_copy = copy.deepcopy(self.state)
            board_copy.place(move)
            children.append(
                Node(
                    state=board_copy,
                    parent=self,
                    move=move,
                )
            )
        self.children = children
        return children

In [54]:
class Agent:
    def __init__(self):
        pass
    
    def evaluate(self, board):
        '''
        board -> best move
        '''
        pass

In [55]:
def random_eval(board):
    return np.random.normal()

In [56]:
class MiniMaxAgent(Agent):
    def __init__(self, eval_fn, depth):
        super().__init__()
        self.eval_fn = eval_fn
        self.evaluator = MiniMaxEvaluator(eval_fn, depth)
        
    def evaluate(self, board):
        '''
        board -> best move
        '''
        node = Node(board, None, None)
        return self.evaluator.evaluate(node)

In [57]:
class MiniMaxEvaluator:
    def __init__(self, eval_fn, depth):
        self.eval_fn = eval_fn
        self.depth = depth
    
    def minimax(self, node, depth, maximizing_player):
        if depth == 0 or node.state.get_winner() != 0 or len(list(node.state.get_legal_moves())) == 0:
            return self.eval_fn(node)
        
        if maximizing_player:
            value = -np.inf
            for child in node.get_children():
                value = max(value, self.minimax(child, depth-1, False))
            return value
        else:
            value = np.inf
            for child in node.get_children():
                value = min(value, self.minimax(child, depth-1, True))
            return value
        
    def evaluate(self, node):
        '''
        node -> best move
        '''
        best_move = None
        best_eval = -np.inf
        for move in node.state.get_legal_moves():
            board_copy = copy.deepcopy(node.state)
            board_copy.place(move)
            child_node = Node(
                state=board_copy,
                parent=self,
                move=move,
            )
            value = self.minimax(child_node, self.depth, False)
            
            if value > best_eval:
                best_move = move
                best_eval = value
                
        return best_move

In [84]:
class TicTacToe:
    def __init__(self, board, agent1, agent2):
        self.board_wrapper = board
        self.agent1 = agent1
        self.agent2 = agent2
        
    def sim_game(self, display=True):
        self.board_wrapper.reset()
        n_moves = 0
        while self.board_wrapper.get_winner() == 0 and len(list(self.board_wrapper.get_legal_moves())) > 0:
            if display:
                print(f'========= move {n_moves} =========')
                print(self.board_wrapper)
                print('=========        =========')
                
            if self.board_wrapper.current_player == -1:
                best_move = agent1.evaluate(self.board_wrapper)
                self.board_wrapper.place(best_move)
            else:
                best_move = agent2.evaluate(self.board_wrapper)
                self.board_wrapper.place(best_move)
            
            
            n_moves += 1
        if display:
                print(f'========= move {n_moves} =========')
                print(self.board_wrapper)
                print('=========        =========')
                
        return 'draw' if len(list(self.board_wrapper.get_legal_moves())) == 0 else self.board_wrapper.get_winner()

In [90]:
agent1 = MiniMaxAgent(random_eval, depth=2)
agent2 = MiniMaxAgent(random_eval, depth=2)

In [91]:
board = Board2D(board_size=3)

In [92]:
t = TicTacToe(board, agent1, agent2)

In [95]:
res = {
    -1: 0,
    'draw': 0,
    1: 0
}

for i in range(100):
    outcome = t.sim_game(display=False)
    res[outcome] += 1