In [117]:
from tictactoe import Board2D, Board3D, TicTacToe # all tictactoe / gamelogic stuff is in tictactoe.py
from sim import simulate # sim.py is where the simulations lie
from agent import RandomAgent # whenever you finalize an agent, add it to agents.py!

import numpy as np

In [175]:
board = Board2D(board_size=3) # this defines a 3x3 gameboard, i.e. the original gameboard

Below I will define the agents I am going to use

In [3]:
from agent import MiniMaxAgent

In [34]:
def dist(loc1, loc2):
    return np.square(loc1 - loc2).sum()

def dist_from_center(loc, board):
    center = np.asarray([board.shape[0]//2, board.shape[1]//2])
    return dist(np.asarray(loc), center)

def center_eval(node, player):
    board = node.state
    score = 0
    # i.e. gets the player we're evaluating the perspective of
    my_pieces = zip(*np.where(board.board == player))
    for loc in my_pieces:
        score -= dist_from_center(loc, board.board)
    return score + np.random.normal()

In [176]:
def two_in_a_row_eval(node, player):
    board = node.state
    score = 0
    # i.e. gets the player we're evaluating the perspective of
    my_pieces = zip(*np.where(board.board == player))
    
    if(board.get_winner() == player):
        score = 1000
    elif(board.get_winner() == -player):
        score = -1000
     
    for i in range(board.board.shape[0]):
        if sum(board.board[i]) == 2*player:
            score += 10
    for j in range(board.board.shape[1]):
        if sum(board.board[:,j]) == 2*player:
            score += 10
    if sum(np.diag(board.board)) == 2*player:
        score += 10
    if sum(np.diag(np.fliplr(board.board))) == 2*player:
        score += 10
    
    for i in range(board.board.shape[0]):
        if sum(board.board[i]) == -2*player:
            score -= 10
    for j in range(board.board.shape[1]):
        if sum(board.board[:,j]) == -2*player:
            score -= 10
    if sum(np.diag(board.board)) == -2*player:
        score -= 10
    if sum(np.diag(np.fliplr(board.board))) == -2*player:
        score -= 10
    
    return score + np.random.normal()
    # iterate through columns and check if they are 2 and an open color
    # iterate through rows and check if they are 2 and an open color
    # do this with diagonals as well

In [183]:
def simple(node, player):
    if(board.get_winner() == player):
        score = 1000000
    elif(board.get_winner() == -player):
        score = -1000000
    else:
        score = 0
    return score

In [184]:
MMagent0 = MiniMaxAgent(two_in_a_row_eval, name = 'MiniMaxAgentTiar', depth=0)
MMagent1 = MiniMaxAgent(two_in_a_row_eval, name = 'MiniMaxAgentTiar', depth=1)
MMagent2 = MiniMaxAgent(two_in_a_row_eval, name = 'MiniMaxAgentTiar', depth=2)
MMagent3 = MiniMaxAgent(two_in_a_row_eval, name = 'MiniMaxAgentTiar', depth=3)
MMagent4 = MiniMaxAgent(two_in_a_row_eval, name = 'MiniMaxAgentTiar', depth=4)
MMagent0_center = MiniMaxAgent(center_eval, name = 'MiniMaxAgentCenter', depth=0)
MMagent1_center = MiniMaxAgent(center_eval, name = 'MiniMaxAgentCenter', depth=1)
MMagent2_center = MiniMaxAgent(center_eval, name = 'MiniMaxAgentCenter', depth=2)
MMagent3_center = MiniMaxAgent(center_eval, name = 'MiniMaxAgentCenter', depth=3)
MMsimple0 = MiniMaxAgent(simple, name = 'Simple', depth=0)
MMsimple1 = MiniMaxAgent(simple, name = 'Simple', depth=1)
MMsimple2 = MiniMaxAgent(simple, name = 'Simple', depth=2)
MMsimple3 = MiniMaxAgent(simple, name = 'Simple', depth=3)
rand = RandomAgent(name='rand')

In [179]:
simple0_res = simulate(board, rand, MMsimple0, num_sims=1000)
simple0_res

{'draw': 218, 'rand': 217, 'Simple': 565}

In [180]:
simple1_res = simulate(board, rand, MMsimple1, num_sims=1000)
simple1_res

{'draw': 213, 'rand': 249, 'Simple': 538}

In [181]:
simple2_res = simulate(board, rand, MMsimple2, num_sims=1000)
simple2_res

{'draw': 211, 'rand': 233, 'Simple': 556}

In [185]:
simple3_res = simulate(board, rand, MMsimple3, num_sims=1000)
simple3_res

{'draw': 220, 'rand': 238, 'Simple': 542}

In [90]:
tiarVScenter_res = simulate(board, MMagent2_center, MMagent2, num_sims=100)
tiarVScenter_res

{'draw': 0, 'MiniMaxAgentCenter': 24, 'MiniMaxAgentTiar': 76}

In [91]:
tiarVScenter_res4x4 = simulate(Board2D(board_size=4), MMagent2_center, MMagent2, num_sims=100)
tiarVScenter_res4x4

{'draw': 0, 'MiniMaxAgentCenter': 6, 'MiniMaxAgentTiar': 94}

In [92]:
tiarVScenter_res5x5 = simulate(Board2D(board_size=5), MMagent2_center, MMagent2, num_sims=100)
tiarVScenter_res5x5

{'draw': 15, 'MiniMaxAgentCenter': 45, 'MiniMaxAgentTiar': 40}

In [93]:
#tiarVScenter_res6x6 = simulate(Board2D(board_size=6), MMagent2_center, MMagent2, num_sims=100)
#tiarVScenter_res6x6

In the last three simulations we tried our two in a row evaluation minimax function versus a center evaluation minimax function. We see that for a 3x3 the two in a row evaluation minimax function performs really well in a 3x3 board winning in 76/100 simulations. It performs even better in a 4x4 board winning in 94/100 simulations.

In [99]:
tiarVScenter0_res = simulate(board, MMagent0_center, MMagent0, num_sims=100)
tiarVScenter0_res

{'draw': 3, 'MiniMaxAgentCenter': 17, 'MiniMaxAgentTiar': 80}

In [100]:
tiarVScenter1_res = simulate(board, MMagent1_center, MMagent1, num_sims=100)
tiarVScenter1_res

{'draw': 6, 'MiniMaxAgentCenter': 23, 'MiniMaxAgentTiar': 71}

In [101]:
tiarVScenter2_res = simulate(board, MMagent2_center, MMagent2, num_sims=100)
tiarVScenter2_res

{'draw': 0, 'MiniMaxAgentCenter': 21, 'MiniMaxAgentTiar': 79}

In [102]:
tiarVScenter3_res = simulate(board, MMagent3_center, MMagent3, num_sims=100)
tiarVScenter3_res

{'draw': 0, 'MiniMaxAgentCenter': 39, 'MiniMaxAgentTiar': 61}

In [113]:
tiar0_res = simulate(board, rand, MMagent0, num_sims=1000)
tiar0_res

{'draw': 59, 'rand': 42, 'MiniMaxAgentTiar': 899}

In [110]:
tiar1_res = simulate(board, rand, MMagent1, num_sims=1000)
tiar1_res

{'draw': 90, 'rand': 62, 'MiniMaxAgentTiar': 848}

In [87]:
tiar2_res = simulate(board, rand, MMagent2, num_sims=1000)
tiar2_res

{'draw': 38, 'rand': 39, 'MiniMaxAgentTiar': 923}

In [88]:
tiar3_res = simulate(board, rand, MMagent3, num_sims=1000)
tiar3_res

{'draw': 24, 'rand': 98, 'MiniMaxAgentTiar': 878}

In [89]:
tiar4_res = simulate(board, rand, MMagent4, num_sims=1000)
tiar4_res

{'draw': 35, 'rand': 65, 'MiniMaxAgentTiar': 900}

Prior to explaining these results let me explain the evaluation function I made and used for these simulations. My function values boards where you have 3 in a row very high and boards where the opponent has three in a row very low. This is obvious because those are obviously wins/losses. The more unique part of this is when we value us having 2 in a row/column/diagonal with an open third spot and devalue when the opponent has that. This is because having 2 in a row/column/diagonal with an open third spot opens the opportunity to win if you can get that third part. The random agent on the other hand just chooses a random open spot. The center evaluation function favors spaces closer to the center. The reason I believe my function will work well in 3x3 boards because we aim to put ourselves in positions to win by getting the 2 in the same row/column/diagonal where there is an open space. The open space would be a winning move so the more of these we get the more winning moves we will have and the more likely we are to win. While this alone is a solid strategy, our evaluation function also accounts for how close our opponent is to winning. If they are close to winning we value that board extremely low so we make sure that board does not occur.

Results:

The depth 0,1,2,3,4 minimax function with the two in a row evaluation method resulted in win rates of 89.9%, 84.8%, 92.3%, 87.8%, and 90.0% respectively versus a random win rate. This shows a very interesting result that shows a flaw in this evaluation function. The flaw is that it works better for even depths then odd depths. Furthermore, increasing the depth seems to help only a little bit. One hypothesis to why this evaluation function yields this result is that this function gets less useful as the board gets more full because there will be less cases in which you can get 2 in the same row/column/diagonal with an empty slot. It also may weigh the possibility of getting 2 in a row too heavily. For example, it might try and get 2 in an edge row/column instead of the middle. One way we could improve this is favoring center pieces as well.

We also tested the two in a row evaluation function versus the center evaluation funciton with minimax on 3x3, 4x4, and 5x5 matrices. The results of this showed that for 3x3 and 4x4 the two in a row evaluation worked better, however, the center evaluation method worked better for 5x5. This leads me to hypothesize that for higher dimension boards the two in a row function won't work as will. This makes sense because at higher dimension boards, trying to get 2 in a row is only a fraction of what you are looking for. For example, it will aim to just get a bunch of rows/columns/diagonals with 2 in it instead of trying to get n in the same row. Basically, this evaluation function does not scale. One way that it may be able to scale to higher dimension tic-tac-toe is if you replaced 2 with n-1. However, this wouldn't work that well because it would be hard to calculate a score when you are not close at all to getting n-1 in a row/column/diagonal.

All in all, this two in a row evaluation function had a win percentage over 84% for all simulated depths versus a random agent. Also, the two in a row evaluation function performed much better than the center evaluation function on the 3x3 board at depth 2. It proved to be a good and efficient evaluation function for depth 2 however it needs to be improved to work better at other depths and higher dimensions.





In [136]:
def num_two_in_a_row(board, player):
    if board.get_winner() == player:
        return 10000000
    elif board.get_winner() == -player:
        return -10000000
    
    score = 0
    
    player_locs = list(zip(*np.where(board == player)))
    for idx in range(len(player_locs)):
        for idx2 in range(idx, len(player_locs)):
            loc1 = np.asarray(player_locs[idx])
            loc2 = np.asarray(player_locs[idx2])
            if dist(loc1, loc2) <= np.sqrt(3):
                score += 1
                
    other_player_locs = list(zip(*np.where(board == player)))
    for idx in range(len(other_player_locs)):
        for idx2 in range(idx, len(other_player_locs)):
            loc1 = np.asarray(other_player_locs[idx])
            loc2 = np.asarray(other_player_locs[idx2])
            if dist(loc1, loc2) <= np.sqrt(3):
                score -= 1
            
    return score

In [205]:
def eval_for_3d(node, player):
    board = node.state
    player = board.pieces[(board.current_player+1)%2]
    return num_two_in_a_row(board, player) + (np.random.normal()/20)

In [206]:
def simple_eval_3d(node, player):
    if board.get_winner() == player:
        return 10000000
    elif board.get_winner() == -player:
        return -10000000
    else:
        return np.random.normal()

In [207]:
board = Board3D(board_size=3)

In [208]:
MMagent1_3d = MiniMaxAgent(eval_for_3d, name = 'MiniMaxAgentTiar_3d', depth=1)
MMagent1_3d_second = MiniMaxAgent(eval_for_3d, name = 'MiniMaxAgentTiar_3d the second', depth=1)
simpleMMagent_3d = MiniMaxAgent(simple_eval_3d, name = 'simpleMMagent_3d', depth=1)
simpleMMagent_3d_second = MiniMaxAgent(simple_eval_3d, name = 'simpleMMagent_3d the second', depth=1)

In [209]:
from tictactoe import TicTacToe

def simulate_oneway(board, agent1, agent2, num_sims, display=False):
    order = TicTacToe(board, agent1, agent2)
    
    sim = {
        'draw': 0,
        agent1.name: 0,
        agent2.name: 0,
    }

    for _ in range(num_sims):
        res = order.sim_game(display=display)        
        sim[res] = sim.get(res, 0) + 1

    return sim

In [210]:
tiar1_3d_res = simulate(board, rand, MMagent1_3d, num_sims=100)
tiar1_3d_res

{'draw': 0, 'rand': 6, 'MiniMaxAgentTiar_3d': 94}

In [211]:
simple_3d_res = simulate(board, rand, simpleMMagent_3d, num_sims=100)
simple_3d_res

{'draw': 0, 'rand': 6, 'simpleMMagent_3d': 94}

In [213]:
tiar1_simple_3d_res = simulate_oneway(board, simpleMMagent_3d, MMagent1_3d, num_sims=100)
tiar1_simple_3d_res

{'draw': 0, 'simpleMMagent_3d': 58, 'MiniMaxAgentTiar_3d': 42}

In [215]:
tiar1_simple_3d_res2 = simulate_oneway(board, MMagent1_3d, simpleMMagent_3d, num_sims=100)
tiar1_simple_3d_res2

{'draw': 0, 'MiniMaxAgentTiar_3d': 64, 'simpleMMagent_3d': 36}

In [217]:
sampleVSsample_3d_res = simulate_oneway(board, simpleMMagent_3d_second, simpleMMagent_3d, num_sims=100)
sampleVSsample_3d_res

{'draw': 0, 'simpleMMagent_3d the second': 60, 'simpleMMagent_3d': 40}

During this experiment, I used 3 different agents: random, 2 in a row, and a simple one for the minimax function. Against random agents, the 2 in a row and simple one both won with a 94% win rate. Now we put them up against each other. When 2 in a row started against the simple agent it won 64% of the time while the simple agent only won 58% of the time when it started. This however is not statistically significant enough to determine that the 2 in a row is a better agent.