In [1]:
import numpy as np

In [2]:
class TicTacToe:
    def __init__(self):
        self.board = np.zeros((3, 3))
        self.w = np.zeros(9)

    def reset(self):
        self.board = np.zeros((3, 3))

    def get_state(self):
        return self.board.flatten()

    def get_valid_moves(self):
        return np.argwhere(self.board == 0)

    def make_move(self, move, player):
        self.board[move[0], move[1]] = player

    def game_over(self):
        # Check rows, columns and diagonals for a win
        for i in range(3):
            if abs(sum(self.board[i, :])) == 3 or abs(sum(self.board[:, i])) == 3:
                return True
        if abs(sum(np.diag(self.board))) == 3 or abs(sum(np.diag(np.fliplr(self.board)))) == 3:
            return True
        # Check if the board is full
        if not 0 in self.board:
            return True
        return False

    def get_reward(self):
        # Check for a win or loss
        for i in range(3):
            if sum(self.board[i, :]) == 3 or sum(self.board[:, i]) == 3:
                return 100  # Reward for winning
            if sum(self.board[i, :]) == -3 or sum(self.board[:, i]) == -3:
                return -100  # Penalty for losing
        if sum(np.diag(self.board)) == 3 or sum(np.diag(np.fliplr(self.board))) == 3:
            return 100  # Reward for winning
        if sum(np.diag(self.board)) == -3 or sum(np.diag(np.fliplr(self.board))) == -3:
            return -100  # Penalty for losing

        # Check for potential win by the opponent in the next move
        for move in self.get_valid_moves():
            temp_board = self.board.copy()
            temp_board[move[0], move[1]] = -1  # Assuming opponent is -1
            if self.is_winning_move(temp_board, -1):
                return -50  # Penalize the agent for not blocking this move

        # No immediate win or loss
        return 0

    def is_winning_move(self, board, player):
        # Check rows, columns and diagonals for a win
        for i in range(3):
            if sum(board[i, :]) == player * 3 or sum(board[:, i]) == player * 3:
                return True
        if sum(np.diag(board)) == player * 3 or sum(np.diag(np.fliplr(board))) == player * 3:
            return True
        return False

    def update_weights(self, alpha, v_train, v_hat, x):
        self.w += alpha * (v_train - v_hat) * x

    def v_hat(self, x):
        return np.dot(self.w, x)

    def best_move(self):
        best_v_hat = -np.inf
        best_move = None
        opponent_winning_move = None

        # First, check if the opponent has a winning move and block it
        for move in self.get_valid_moves():
            temp_board = self.board.copy()
            temp_board[move[0], move[1]] = -1  # Assuming opponent is -1
            if self.is_winning_move(temp_board, -1):
                opponent_winning_move = move
                break

        # If the opponent has a winning move, block it
        if opponent_winning_move is not None:
            return opponent_winning_move, 0

        # Otherwise, choose the best move based on the highest v_hat
        for move in self.get_valid_moves():
            temp_board = self.board.copy()
            temp_board[move[0], move[1]] = 1  # Assuming the agent is 1
            v_hat = self.v_hat(temp_board.flatten())
            if v_hat > best_v_hat:
                best_v_hat = v_hat
                best_move = move

        return best_move, best_v_hat


    def train(self, alpha, epochs):
        for epoch in range(epochs):
            self.reset()
            while not self.game_over():
                st = self.get_state()
                move, v_hat_st1 = self.best_move()
                self.make_move(move, 1)
                if self.game_over():
                    v_train_st = self.get_reward()
                else:
                    _, v_hat_st1 = self.best_move()
                    v_train_st = v_hat_st1
                self.update_weights(alpha, v_train_st, self.v_hat(st), st)
                if not self.game_over():
                    opponent_moves = self.get_valid_moves()
                    opponent_move = opponent_moves[np.random.randint(len(opponent_moves))]
                    self.make_move(opponent_move, -1)

# Add the human_vs_agent function here if needed

def human_vs_agent(game):
    game.reset()  # Ensure the game starts with an empty board
    while not game.game_over():
        print("Current board:")
        print(game.board)
        
        # Human move
        move = None
        while move is None:
            try:
                row, col = map(int, input("Enter your move (row, column): ").split(','))
                if game.board[row, col] == 0:
                    move = (row, col)
                else:
                    print("Invalid move. The cell is already occupied.")
            except (ValueError, IndexError):
                print("Invalid input. Please enter row and column separated by a comma.")
        
        game.make_move(move, -1)  # Assuming human is -1
        if game.game_over():
            break
        
        # Agent move
        agent_move, _ = game.best_move()
        if agent_move is not None:
            game.make_move(agent_move, 1)  # Assuming agent is 1
            print(f"Agent moves: {agent_move}")
        else:
            print("No valid moves left for the agent.")
        
        if game.game_over():
            break
    
    print("Game over! Final board:")
    print(game.board)
    reward = game.get_reward()
    if reward == 100:
        print("Agent wins!")
    elif reward == -100:
        print("Human wins!")
    else:
        print("It's a draw!")


def agent_vs_agent(game):
    game.reset()  # Ensure the game starts with an empty board
    while not game.game_over():
        print("Current board:")
        print(game.board)
        
        # Agent 1 move
        agent1_move, _ = game.best_move()
        if agent1_move is not None:
            game.make_move(agent1_move, 1)  # Assuming agent 1 is 1
            print(f"Agent 1 moves: {agent1_move}")
        else:
            print("No valid moves left for Agent 1.")
        
        if game.game_over():
            break
        
        # Agent 2 move
        agent2_move, _ = game.best_move()
        if agent2_move is not None:
            game.make_move(agent2_move, -1)  # Assuming agent 2 is -1
            print(f"Agent 2 moves: {agent2_move}")
        else:
            print("No valid moves left for Agent 2.")
        
        if game.game_over():
            break
    
    print("Game over! Final board:")
    print(game.board)
    reward = game.get_reward()
    if reward == 100:
        print("Agent 1 wins!")
    elif reward == -100:
        print("Agent 2 wins!")
    else:
        print("It's a draw!")

def menu(game):
    print("Welcome to Tic Tac Toe!")
    print("1: Agent vs Agent")
    print("2: Agent vs Human")
    choice = input("Choose an option: ")
    
    if choice == '1':
        agent_vs_agent(game)
    elif choice == '2':
        human_vs_agent(game)
    else:
        print("Invalid option. Please choose 1 or 2.")


In [3]:
game = TicTacToe()
game.train(alpha=0.01, epochs=5500)
menu(game)


Welcome to Tic Tac Toe!
1: Agent vs Agent
2: Agent vs Human
Current board:
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
Agent 1 moves: [1 0]
Agent 2 moves: [0 0]
Current board:
[[-1.  0.  0.]
 [ 1.  0.  0.]
 [ 0.  0.  0.]]
Agent 1 moves: [2 0]
Agent 2 moves: [1 2]
Current board:
[[-1.  0.  0.]
 [ 1.  0. -1.]
 [ 1.  0.  0.]]
Agent 1 moves: [0 2]
Agent 2 moves: [1 1]
Current board:
[[-1.  0.  1.]
 [ 1. -1. -1.]
 [ 1.  0.  0.]]
Agent 1 moves: [2 2]
Agent 2 moves: [2 1]
Current board:
[[-1.  0.  1.]
 [ 1. -1. -1.]
 [ 1. -1.  1.]]
Agent 1 moves: [0 1]
Game over! Final board:
[[-1.  1.  1.]
 [ 1. -1. -1.]
 [ 1. -1.  1.]]
It's a draw!
