In [164]:
"""
Tic Tac Toe Player
"""

import math
import numpy as np
import copy

X = "X"
O = "O"
EMPTY = None


def initial_state():
    """
    Returns starting state of the board.
    """
    return [[EMPTY, EMPTY, EMPTY],
            [EMPTY, EMPTY, EMPTY],
            [EMPTY, EMPTY, EMPTY]]

def numerical_board(board):
    """
    Returns a board where X=1, O=-1, EMPTY=0, as a numpy array.
    """
    for row in range(3):
        for value in range(3):
            if board[row][value] == X:
                board[row][value] = 1
            if board[row][value] == None:
                board[row][value] = 0
            if board[row][value] == O:
                board[row][value] = -1
    return np.array(board)

def player(board):
    """
    Returns player who has the next turn on a board.
    """
    # raise NotImplementedError
    board = copy.deepcopy(board)
    num_board = numerical_board(board)
    if np.sum(num_board)==1:
        return O
    if np.sum(num_board)==0:
        return X

def actions(board):
    """
    Returns set of all possible actions (i, j) available on the board.
    """
    board = copy.deepcopy(board)
    
    num_board = numerical_board(board)
    
    actions = set()
    
    for row in range(3):
        for cell in range(3):
            if num_board[row][cell]==0:
                actions.add((row,cell))
                
    if np.all(num_board!=0):
        return None
                
    return actions


def result(board, action):
    """
    Returns the board that results from making move (i, j) on the board.
    """
    row = action[0]
    cell = action [1]
    
    result_board = copy.deepcopy(board)
    
#     determine what symbol will be placed
    symbol = player(board)
    
    if symbol == 'X':
        result_board[row][cell]=X
    
    if symbol == 'O':
        result_board[row][cell]=O
    
    return result_board


def winner(board):
    """
    Returns the winner of the game, if there is one.
    """
    board = copy.deepcopy(board)
    
    num_board = numerical_board(board)
    #   check rows for winner
    for row in range(3):
        if np.sum(num_board[row])==3:
            return X
        if np.sum(num_board[row])==-3:
            return O
#   check columns for winner
    for column in range(3):
        if np.sum(num_board.T[column])==3:
            return X
        if np.sum(num_board.T[column])==-3:
            return O
#   check diagonals for winner:
    if np.sum(np.diagonal(num_board))==3:
        return X
    if np.sum(np.diagonal(num_board))==-3:
        return O
    if np.sum(np.fliplr(num_board).diagonal())==3:
        return X
    if np.sum(np.fliplr(num_board).diagonal())==-3:
        return O

    else:
        return None


def terminal(board):
    """
    Returns True if game is over, False otherwise.
    """
    board = copy.deepcopy(board)
    
    num_board = numerical_board(board)
    
    if winner(board)==X or winner(board)==O:
        return True

    elif np.all(num_board!=0):
        return True

    else:
        return False


def utility(board):
    """
    Returns 1 if X has won the game, -1 if O has won, 0 otherwise.
    """
    if winner(board)==X:
        return 1
    if winner(board)==O:
        return -1
    if winner(board)==None:
        return 0


def minimax(board):
    """
    Returns the optimal action for the current player on the board.
    """
    raise NotImplementedError

In [1]:
import math

X = "X"
O = "O"
EMPTY = None


def initial_state():
    """
    Returns starting state of the board.
    """
    return [[EMPTY, EMPTY, EMPTY],
            [EMPTY, EMPTY, EMPTY],
            [EMPTY, EMPTY, EMPTY]]

In [2]:
board = [['X', 'X', 'O'],
            ['O', EMPTY, 'X'],
            ['X', 'O', 'O']]
board

In [3]:
import numpy as np

In [4]:
def numerical_board(board):
    for row in range(3):
        for value in range(3):
            if board[row][value] == X:
                board[row][value] = 1
            if board[row][value] == None:
                board[row][value] = 0
            if board[row][value] == O:
                board[row][value] = -1
    return np.array(board)

In [5]:
num_board = numerical_board(board)
num_board

array([[ 1,  1, -1],
       [-1,  0,  1],
       [ 1, -1, -1]])

In [6]:
np.sum(num_board)

0

# Checking which player's turn 
## if np.sum(num_board)==1, then O turn
## if np.sum(num_board)==0, then X turn

In [7]:
def player_turn(board):
    if np.sum(board)==1:
        return O
    if np.sum(board)==0:
        return X

# Checking for end of game

In [8]:
# down each row, across each column, each diagonal
# other: check if all spots are filled

In [9]:
np.ndarray.flatten(num_board)[8]

-1

In [10]:
def is_terminal(board): #input should be an array of board
#   check if all spaces are filled
    for i in range(9):
        if np.ndarray.flatten(board)[i]==0:
            return False
    for i in range(9):
        if np.ndarray.flatten(board)[i]!=0:
            return True


In [11]:
num_board

array([[ 1,  1, -1],
       [-1,  0,  1],
       [ 1, -1, -1]])

In [12]:
is_terminal(num_board)

False

# If game end, check for winner:

In [13]:
def if_winner(board):
#   check rows for winner
    for row in range(3):
        if np.sum(board[row])==3:
            return X
        if np.sum(board[row])==-3:
            return O
#   check columns for winner
    for column in range(3):
        if np.sum(board.T[column])==3:
            return X
        if np.sum(board[row])==-3:
            return O
#   check diagonals for winner:
    if np.sum(np.diagonal(board))==3:
        return X
    if np.sum(np.diagonal(board))==-3:
        return O
    if np.sum(np.fliplr(board).diagonal())==3:
        return X
    if np.sum(np.fliplr(board).diagonal())==-3:
        return O
    
    else: 
        return None

In [14]:
test_board = [['X', 'O', 'X'],
            ['X', 'X', 'O'],
            ['O', 'O', 'X']]
test_board

[['X', 'O', 'X'], ['X', 'X', 'O'], ['O', 'O', 'X']]

In [15]:
test_board_num = numerical_board(test_board)
test_board_num

array([[ 1, -1,  1],
       [ 1,  1, -1],
       [-1, -1,  1]])

In [16]:
if_winner(test_board_num)

'X'

In [17]:
player_turn(test_board_num)

'O'

In [18]:
if_winner(test_board_num)

'X'

In [19]:
def test_terminal(board):
    num_board = numerical_board(board)
    return np.any(num_board==0)

In [20]:
test_terminal(test_board)

False

In [21]:
numerical_board(test_board)==0

array([[False, False, False],
       [False, False, False],
       [False, False, False]])

# ACTIONS FUNCTION 

In [76]:
test0 = [[EMPTY, EMPTY, EMPTY],
            [EMPTY, EMPTY, EMPTY],
            [EMPTY, EMPTY, EMPTY]]

test1 = [[X, EMPTY, EMPTY],
            [EMPTY, O, EMPTY],
            [X, EMPTY, EMPTY]]

test2 = [[X, O, X],
            [EMPTY, X, EMPTY],
            [O, EMPTY, O]]

# test3 = [[EMPTY, EMPTY, EMPTY],
#             [EMPTY, EMPTY, EMPTY],
#             [EMPTY, EMPTY, EMPTY]]

# test4 = [[EMPTY, EMPTY, EMPTY],
#             [EMPTY, EMPTY, EMPTY],
#             [EMPTY, EMPTY, EMPTY]]

In [30]:
def actions(board):
    num_board = numerical_board(board)
    
    actions = set()
    
    for row in range(3):
        for cell in range(3):
            if num_board[row][cell]==0:
                actions.add((row,cell))
                
    if np.all(num_board!=0):
        return None
                
    return actions

In [39]:
actions(test0)

{(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2), (2, 0), (2, 1), (2, 2)}

In [32]:
actions(test1)

{(0, 1), (0, 2), (1, 0), (1, 2), (2, 1), (2, 2)}

In [33]:
actions(test2)

{(1, 0), (1, 2), (2, 1)}

# Result Function 

In [77]:
def symbol_board(num_board):
    """
    Returns a board where 1=X, -1=O, 0=EMPTY, as a list.
    """
    for row in range(3):
        for value in range(3):
            if num_board[row][value] == 1:
                num_board[row][value] = X
            if num_board[row][value] == 0:
                num_board[row][value] = None
            if num_board[row][value] == -1:
                bnum_oard[row][value] = O
    return num_board.tolist() 

In [78]:
import copy

In [84]:
def result(board, action):
    row = action[0]
    cell = action [1]
    
    result_board = copy.deepcopy(board)
    
#     determine what symbol will be placed
    symbol = player(board)
    
    if symbol == 'X':
        result_board[row][cell]=X
    
    if symbol == 'O':
        result_board[row][cell]=O
    
    return result_board

In [85]:
result(test1, (1,2))

[['X', None, None], [None, 'O', 'O'], ['X', None, None]]

# Max value, mix value functions

In [95]:
def max_value(board):
    if terminal(board):
        return utility(board)
    v = -np.inf
    
    for action in actions(board):
        value = v
        v = min_value(result(board,action))
        if v>value:
            print(action)
            print(v)
    return v

def min_value(board):
    if terminal(board):
        return utility(board)
    v = np.inf
    
    for action in actions(board):
        value = v
        v = max_value(result(board,action))
        if v<value:
            print(action)
            print(v)
    return v

In [96]:
max_value(test1)

(2, 2)
1
(2, 1)
1
(2, 1)
0
(0, 2)
0
(2, 2)
1
(0, 2)
1
(0, 2)
0
(2, 1)
1
(0, 2)
1
(0, 2)
1
(1, 0)
1
(1, 0)
1
(2, 2)
1
(1, 0)
1
(1, 0)
1
(2, 1)
1
(1, 0)
1
(1, 0)
1
(1, 0)
1
(2, 2)
1
(1, 0)
1
(1, 0)
1
(0, 2)
1
(1, 0)
1
(1, 0)
1
(1, 0)
1
(2, 1)
0
(1, 0)
0
(1, 0)
1
(2, 1)
1
(0, 2)
0
(1, 0)
0
(1, 0)
1
(0, 2)
1
(1, 2)
1
(1, 0)
1
(1, 0)
-1
(1, 0)
1
(2, 2)
1
(1, 2)
1
(2, 2)
1
(0, 2)
1
(0, 2)
0
(1, 2)
0
(1, 2)
-1
(1, 2)
0
(2, 2)
0
(2, 2)
1
(1, 0)
1
(1, 0)
1
(1, 2)
1
(0, 2)
0
(1, 0)
0
(1, 0)
1
(0, 2)
1
(1, 2)
1
(1, 0)
1
(1, 2)
1
(1, 2)
0
(0, 2)
0
(2, 1)
0
(1, 0)
1
(1, 0)
-1
(1, 0)
1
(2, 1)
1
(1, 2)
1
(2, 1)
1
(0, 2)
1
(0, 2)
1
(1, 2)
1
(1, 2)
-1
(1, 2)
1
(2, 1)
1
(2, 1)
1
(1, 0)
1
(1, 0)
1
(1, 2)
1
(0, 2)
1
(1, 0)
1
(1, 0)
1
(1, 2)
1
(1, 0)
1
(1, 2)
1
(1, 2)
1
(1, 0)
1
(1, 0)
-1
(1, 0)
1
(2, 2)
1
(1, 0)
-1
(1, 0)
1
(2, 1)
1
(1, 2)
1
(2, 2)
1
(2, 1)
1
(2, 1)
0
(1, 2)
0
(1, 2)
-1
(1, 2)
0
(2, 2)
0
(1, 2)
-1
(1, 2)
1
(2, 1)
1
(2, 2)
1
(1, 0)
1
(1, 0)
1
(1, 2)
1
(1, 0)
1
(1, 2)
1
(1, 2)
1
(2, 1)
0
(1

1

In [149]:
def min_value(board):
    if terminal(board):
        return utility(board)
    
    value = np.inf
    move = (None, None)
    
    for action in actions(board):
        v = value
        value = max_value(result(board,action))[0]
        if v>value:
            move[0] = action[0]
            move[1] = action[1]
            
    return value, move

In [144]:
def max_value(board):
    if terminal(board):
        return utility(board)
    
    value = -np.inf
    move = (None, None)
    
    for action in actions(board):
        v = value
        value = min_value(result(board,action))[0]
        if value>v:
            move[0] = action[0]
            move[1] = action[1]
            
    return value, move

In [145]:
max_value(test1)

TypeError: 'tuple' object does not support item assignment

In [146]:
def MaxMin(board):
    if terminal(board):
        return (None, utility(board))
    value = float("-inf")
    move = None
    for action in actions(board):
        v = MinMax(result(board, action))[1]
        if v > value:
            value = v
            move = action
    return (move, value)

def MinMax(board):
    if terminal(board):
        return (None, utility(board))
    value = float("inf")
    move = None
    for action in actions(board):
        v = MaxMin(result(board, action))[1]
        if v < value:
            value = v
            move = action
    return (move, value)

In [148]:
MinMax(test1)

((1, 0), 0)

In [156]:
test1[0][0]

'X'

In [162]:
test4 = [[X, X, O],
            [X, X, O],
            [O, EMPTY, O]]

In [165]:
winner(test4)

'O'