In [2]:
from tictactoe import Board3D, TicTacToe
from sim import simulate
from agent import RandomAgent, MiniMaxAgent
from eval_functions import dist
import numpy as np

In [3]:
rand1 = RandomAgent(name='rand1')
rand2 = RandomAgent(name='rand2')

board = Board3D(board_size=3)

In [4]:
order1 = TicTacToe(board, rand1, rand2)
order2 = TicTacToe(board, rand2, rand1)

sim = {
    'draw': 0,
    rand1.name: 0,
    rand2.name: 0,
}

boards = []

for _ in range(100):
    res1, board1 = order1.sim_game(return_boards=True)
    res2, board2 = order2.sim_game(return_boards=True)

    sim[res1] = sim.get(res1, 0) + 1
    sim[res2] = sim.get(res2, 0) + 1
    
    boards.extend(board1)
    boards.extend([-x for x in board2])

In [5]:
res1, board1 = order1.sim_game(return_boards=True)
res2, board2 = order2.sim_game(return_boards=True)

In [6]:
boards = np.asarray(boards)

In [7]:
mask = [list(zip(*np.where(board != -1))) for board in boards if list(zip(*np.where(board != -1)))]


In [8]:
boards_masked = boards.copy()

for idx in range(len(boards)):
    for m in mask[idx]:
        boards_masked[idx][m] = 0

In [9]:
mean = np.mean(boards_masked, axis=0)

In [10]:
mean/np.sum(mean)

array([[[0.04526244, 0.03449216, 0.02903885],
        [0.03312883, 0.03544649, 0.04335378],
        [0.02999318, 0.03653715, 0.04349012]],

       [[0.04321745, 0.04689843, 0.03940014],
        [0.03994547, 0.03435583, 0.03531016],
        [0.04185412, 0.02862986, 0.03544649]],

       [[0.03953647, 0.03408316, 0.03203817],
        [0.03694615, 0.03531016, 0.03067485],
        [0.0342195 , 0.03667348, 0.04471711]]])

In [11]:
simulate(Board3D(size=3), rand1, rand2, num_sims=1000)

{'draw': 0, 'rand1': 485, 'rand2': 515}

In [12]:
simulate(Board3D(size=4), rand1, rand2, num_sims=1000)

{'draw': 0, 'rand1': 491, 'rand2': 509}

In [13]:
simulate(Board3D(size=5), rand1, rand2, num_sims=1000)

{'draw': 0, 'rand1': 524, 'rand2': 476}

In [14]:
simulate(Board3D(size=6), rand1, rand2, num_sims=1000)

{'draw': 0, 'rand1': 506, 'rand2': 494}

In [15]:
simulate(Board3D(size=7), rand1, rand2, num_sims=1000)

{'draw': 0, 'rand1': 517, 'rand2': 483}

In [16]:
simulate(Board3D(size=8), rand1, rand2, num_sims=1000)

{'draw': 0, 'rand1': 479, 'rand2': 521}

In [17]:
simulate(Board3D(size=9), rand1, rand2, num_sims=1000)

{'draw': 0, 'rand1': 511, 'rand2': 489}

In [18]:
simulate(Board3D(size=10), rand1, rand2, num_sims=1000)

{'draw': 0, 'rand1': 497, 'rand2': 503}

In [19]:
simulate(Board3D(size=11), rand1, rand2, num_sims=1000)

{'draw': 0, 'rand1': 538, 'rand2': 462}

In [21]:
simulate(Board3D(size=20), rand1, rand2, num_sims=1000)

{'draw': 0, 'rand1': 489, 'rand2': 511}

In [22]:
def num_two_in_a_row(board, player):
    if board.get_winner() == player:
        return 10000000
    elif board.get_winner() == -player:
        return -10000000
    
    score = 0
    
    player_locs = list(zip(*np.where(board == player)))
    for idx in range(len(player_locs)):
        for idx2 in range(idx, len(player_locs)):
            loc1 = np.asarray(player_locs[idx])
            loc2 = np.asarray(player_locs[idx2])
            if dist(loc1, loc2) <= np.sqrt(3):
                score += 1
                
    other_player_locs = list(zip(*np.where(board == player)))
    for idx in range(len(other_player_locs)):
        for idx2 in range(idx, len(other_player_locs)):
            loc1 = np.asarray(other_player_locs[idx])
            loc2 = np.asarray(other_player_locs[idx2])
            if dist(loc1, loc2) <= np.sqrt(3):
                score -= 1
            
    return score

In [23]:
def eval_for_3d(node, player):
    board = node.state
    return num_two_in_a_row(board, player)

In [24]:
agent3d = MiniMaxAgent(eval_for_3d, depth=2, name='twos')

In [None]:
simulate(Board3D(size=3), rand1, agent3d, num_sims=100)

{'draw': 0, 'rand1': 1, 'twos': 99}