In [None]:
# Import the libraries
import numpy as np
import random

# Define the constants
EMPTY = 0
X = 1
O = -1
WIN_REWARD = 1
LOSE_REWARD = -1
DRAW_REWARD = 0
LEARNING_RATE = 0.1
DISCOUNT_FACTOR = 0.9
EXPLORATION_RATE = 0.1

# Define the Q-table
Q_table = {}

# Define the game class
class TicTacToe:
    def __init__(self):
        # Initialize the board
        self.board = np.zeros((3, 3), dtype=int)
        # Initialize the players
        self.players = [X, O]
        # Initialize the current player
        self.current_player = None
        # Initialize the winner
        self.winner = None
        # Initialize the game over flag
        self.game_over = False

    def reset(self):
        # Reset the board
        self.board = np.zeros((3, 3), dtype=int)
        # Reset the current player
        self.current_player = None
        # Reset the winner
        self.winner = None
        # Reset the game over flag
        self.game_over = False

    def available_moves(self):
        # Return a list of available moves
        moves = []
        for i in range(3):
            for j in range(3):
                if self.board[i][j] == EMPTY:
                    moves.append((i, j))
        return moves

    def make_move(self, move):
        # Check if the move is valid
        if self.board[move[0]][move[1]] != EMPTY:
            return False
        # Make the move
        self.board[move[0]][move[1]] = self.players.index(self.current_player) + 1
        # Check the winner
        self.check_winner()
        # Switch the player
        self.switch_player()
        return True

    def switch_player(self):
        # Switch the current player
        if self.current_player == self.players[0]:
            self.current_player = self.players[1]
        else:
            self.current_player = self.players[0]

    def check_winner(self):
        # Check rows
        for i in range(3):
            if self.board[i][0] == self.board[i][1] == self.board[i][2] != EMPTY:
                self.winner = self.players[int(self.board[i][0] - 1)]
                self.game_over = True
        # Check columns
        for j in range(3):
            if self.board[0][j] == self.board[1][j] == self.board[2][j] != EMPTY:
                self.winner = self.players[int(self.board[0][j] - 1)]
                self.game_over = True
        # Check diagonals
        if self.board[0][0] == self.board[1][1] == self.board[2][2] != EMPTY:
            self.winner = self.players[int(self.board[0][0] - 1)]
            self.game_over = True
        if self.board[0][2] == self.board[1][1] == self.board[2][0] != EMPTY:
            self.winner = self.players[int(self.board[0][2] - 1)]
            self.game_over = True
        # Check draw
        if not self.game_over and len(self.available_moves()) == 0:
            self.winner = None
            self.game_over = True

    def print_board(self):
        # Print the board
        print("-------------")
        for i in range(3):
            print("|", end="")
            for j in range(3):
                print(self.players[int(self.board[i][j] - 1)] if self.board[i][j] != EMPTY else " ", end="|")
            print()
        print("-------------")

# Define the agent class
class Agent:
    def __init__(self, symbol):
        # Initialize the symbol
        self.symbol = symbol

    def get_state(self, game):
        # Return the state as a string
        return "".join(map(str, game.board.flatten()))

    def get_action(self, game):
        # Choose an action using epsilon-greedy policy
        state = self.get_state(game)
        if random.random() < EXPLORATION_RATE:
            # Explore a random action
            return random.choice(game.available_moves())
        else:
            # Exploit the best action
            if state not in Q_table:
                # Initialize the Q-values for the state
                Q_table[state] = np.zeros(9)
            return np.unravel_index(np.argmax(Q_table[state]), (3, 3))

    def update(self, game,old_state, action, reward, new_state):
        # Update the Q-table using Q-learning
        if old_state not in Q_table:
            # Initialize the Q-values for the old state
            Q_table[old_state] = np.zeros(9)
        if new_state not in Q_table:
            # Initialize the Q-values for the new state
            Q_table[new_state] = np.zeros(9)
        # Update the Q-value for the state-action pair
        # Convert the 2D action into a 1D index
        action_index = np.ravel_multi_index(action, (3, 3))

        # Use the 1D index to index into the Q-table
        old_Q = Q_table[old_state][action_index]
        max_Q = np.max(Q_table[new_state])
        Q_table[old_state][action_index] = old_Q + LEARNING_RATE * (reward + DISCOUNT_FACTOR * max_Q - old_Q)

# Create the game instance
game = TicTacToe()

# Create the agent instance
agent = Agent(X)

# Train the agent by playing 10000 games
for i in range(10000):
    # Reset the game
    game.reset()
    # Choose a random player to start
    game.current_player = random.choice(game.players)
    # Play until the game is over
    while not game.game_over:
        # Get the current state
        old_state = agent.get_state(game)
        # Get the current player
        player = game.current_player
        # Get the action
        if player == agent.symbol:
            # Agent's turn
            action = agent.get_action(game)
        else:
            # Opponent's turn
            action = random.choice(game.available_moves())
        # Make the move
        game.make_move(action)
        # Get the reward
        if game.game_over:
            # Game is over
            if game.winner == agent.symbol:
                # Agent wins
                reward = WIN_REWARD
            elif game.winner == None:
                # Draw
                reward = DRAW_REWARD
            else:
                #
                reward = LOSE_REWARD
        else:
            # Game is not over
            reward = 0
        # Get the new state
        new_state = agent.get_state(game)
        # Update the agent
        if player == agent.symbol:
            # Agent's turn
            agent.update(game, old_state, action, reward, new_state)

In [None]:
# Test the agent by playing 10 games
for i in range(10):
    # Reset the game
    game.reset()
    # Choose a random player to start
    game.current_player = random.choice(game.players)
    # Print the board
    game.print_board()
    # Play until the game is over
    while not game.game_over:
        # Get the current player
        player = game.current_player
        # Get the action
        valid = False
        while not valid:
            if player == agent.symbol:
                # Agent's turn
                action = agent.get_action(game)
            else:
                # Human's turn
                print("Your turn. Enter row and column (0-2) separated by space:")
                row, col = map(int, input().split())
                action = (row, col)
            # Check if the move is valid
            valid = game.make_move(action)
            if not valid:
                print("Invalid move. Try again.")
        # Print the board
        game.print_board()
        # Check the winner
        if game.game_over:
            # Game is over
            if game.winner == agent.symbol:
                # Agent wins
                print("Agent wins!")
            elif game.winner == None:
                # Draw
                print("Draw!")
            else:
                # Human wins
                print("You win!")
        # Switch the player
        game.switch_player()