In [1]:
from tictactoe import Board2D, TicTacToe
from sim import simulate
import matplotlib.pyplot as plt
from agent import RandomAgent
import numpy as np

In [2]:
rand1 = RandomAgent(name='rand1')
rand2 = RandomAgent(name='rand2')

board = Board2D(board_size=3)

In [3]:
order1 = TicTacToe(board, rand1, rand2)
order2 = TicTacToe(board, rand2, rand1)

sim = {
    'draw': 0,
    rand1.name: 0,
    rand2.name: 0,
}

boards = []

for _ in range(100):
    res1, board1 = order1.sim_game(return_boards=True)
    res2, board2 = order2.sim_game(return_boards=True)

    sim[res1] = sim.get(res1, 0) + 1
    sim[res2] = sim.get(res2, 0) + 1
    
    boards.extend(board1)
    boards.extend([-x for x in board2])

In [4]:
boards = np.asarray(boards)

## Rand Heatmap

In [5]:
mask = [list(zip(*np.where(board != -1))) for board in boards if list(zip(*np.where(board != -1)))]

In [6]:
boards_masked = boards.copy()
for idx in range(len(boards)):
    for m in mask[idx]:
        boards_masked[idx][m] = 0

In [7]:
mean = np.mean(boards_masked, axis=0)

In [8]:
mean/np.sum(mean) # normalized distrib

array([[0.09780047, 0.11429694, 0.11665357],
       [0.10919089, 0.12175962, 0.11704635],
       [0.11311862, 0.11390416, 0.09622938]])

## Center-seeking heatmap

In [9]:
from eval_functions import my_eval_function
from agent import MiniMaxAgent

In [10]:
agent1 = MiniMaxAgent(my_eval_function, depth=2)

order1 = TicTacToe(board, agent1, rand2)
order2 = TicTacToe(board, rand2, agent1)

sim = {
    'draw': 0,
    rand1.name: 0,
    rand2.name: 0,
}

boards = []

for _ in range(100):
    res1, board1 = order1.sim_game(return_boards=True)
    res2, board2 = order2.sim_game(return_boards=True)

    sim[res1] = sim.get(res1, 0) + 1
    sim[res2] = sim.get(res2, 0) + 1
    
    boards.extend(board1)
    boards.extend([-x for x in board2])

In [11]:
mask = [list(zip(*np.where(board != -1))) for board in boards if list(zip(*np.where(board != -1)))]

In [12]:
boards_masked = boards.copy()
for idx in range(len(boards)):
    for m in mask[idx]:
        boards_masked[idx][m] = 0

In [13]:
mean = np.mean(boards_masked, axis=0)
mean/np.sum(mean) # normalized distrib

array([[0.01466594, 0.12873438, 0.01249321],
       [0.12004345, 0.47963064, 0.11569799],
       [0.01792504, 0.09451385, 0.01629549]])

## Two-in-a-row Heatmap

In [14]:
def num_two_in_a_row(board, player):
    if board.get_winner() == player:
        return 10000000
    elif board.get_winner() == -player:
        return -10000000
    
    score = 0
    
    player_locs = list(zip(*np.where(board == player)))
    for idx in range(len(player_locs)):
        for idx2 in range(idx, len(player_locs)):
            loc1 = np.asarray(player_locs[idx])
            loc2 = np.asarray(player_locs[idx2])
            if dist(loc1, loc2) <= np.sqrt(3):
                score += 1
                
    other_player_locs = list(zip(*np.where(board == player)))
    for idx in range(len(other_player_locs)):
        for idx2 in range(idx, len(other_player_locs)):
            loc1 = np.asarray(other_player_locs[idx])
            loc2 = np.asarray(other_player_locs[idx2])
            if dist(loc1, loc2) <= np.sqrt(3):
                score -= 1
            
    return score

In [15]:
def eval_twos(node, player):
    board = node.state
    return num_two_in_a_row(board, player)

In [16]:
agent1 = MiniMaxAgent(eval_twos, depth=2)

order1 = TicTacToe(board, agent1, rand2)
order2 = TicTacToe(board, rand2, agent1)

sim = {
    'draw': 0,
    rand1.name: 0,
    rand2.name: 0,
}

boards = []

for _ in range(100):
    res1, board1 = order1.sim_game(return_boards=True)
    res2, board2 = order2.sim_game(return_boards=True)

    sim[res1] = sim.get(res1, 0) + 1
    sim[res2] = sim.get(res2, 0) + 1
    
    boards.extend(board1)
    boards.extend([-x for x in board2])

In [17]:
mask = [list(zip(*np.where(board != -1))) for board in boards if list(zip(*np.where(board != -1)))]
boards_masked = boards.copy()
for idx in range(len(boards)):
    for m in mask[idx]:
        boards_masked[idx][m] = 0
mean = np.mean(boards_masked, axis=0)
mean/np.sum(mean) # normalized distrib

array([[0.49019608, 0.20468675, 0.09182209],
       [0.04638929, 0.05643233, 0.04112865],
       [0.02725968, 0.02104256, 0.02104256]])