In [9]:
import random
import numpy as np

# Global Q-Table and Hyperparameters
Q = {}
ALPHA = 0.5  # Learning rate
GAMMA = 0.9  # Discount factor
EMPTY = ' '

# --- Core Logic Functions ---

def get_state(board):
    """Converts board to hashable state string."""
    return "".join(board)

def get_actions(board):
    """Returns indices of empty cells."""
    return [i for i, x in enumerate(board) if x == EMPTY]

def check_win(board, player):
    """Checks for a win condition."""
    wins = [(0,1,2), (3,4,5), (6,7,8), (0,3,6), (1,4,7), (2,5,8), (0,4,8), (2,4,6)]
    return any(all(board[i] == player for i in win) for win in wins)

def choose_action(board, epsilon=0.1):
    """Epsilon-greedy action choice for 'X'."""
    state = get_state(board)
    actions = get_actions(board)

    if random.random() < epsilon or state not in Q:
        return random.choice(actions)

    # Exploitation: Max Q-value
    return max(actions, key=lambda x: Q[state].get(x, 0))

def update_q(state, action, reward, next_state):
    """Updates Q-value using Bellman equation."""
    if state not in Q: Q[state] = {}

    old_q = Q[state].get(action, 0)
    next_max = max(Q.get(next_state, {}).values(), default=0)

    # Q(s, a) = Q(s, a) + ALPHA * (reward + GAMMA * max(Q(s', a')) - Q(s, a))
    Q[state][action] = old_q + ALPHA * (reward + GAMMA * next_max - old_q)


# --- Training ---

def train_ai(episodes=5000):
    """Trains the AI (Player 'X') against a random opponent ('O')."""
    for _ in range(episodes):
        board = [EMPTY] * 9
        while True:
            # AI (X) Move
            state = get_state(board)
            action = choose_action(board)
            board[action] = 'X'

            # Check terminal states after X moves
            if check_win(board, 'X'):
                update_q(state, action, 1, get_state(board)); break # Win
            if not get_actions(board):
                update_q(state, action, 0.5, get_state(board)); break # Draw

            # Opponent (O) Move (Random)
            opp_actions = get_actions(board)
            if not opp_actions:
                 update_q(state, action, 0.5, get_state(board)); break

            opp_action = random.choice(opp_actions)
            board[opp_action] = 'O'

            # Check terminal state after O moves
            if check_win(board, 'O'):
                update_q(state, action, -1, get_state(board)); break # Loss

            # Neutral reward for non-terminal move
            update_q(state, action, 0, get_state(board))


# --- Game Display (Kept simple) ---

def display_board(board):
    """Prints the board cleanly."""
    print("\n".join([" | ".join(board[i*3:(i+1)*3]) for i in range(3)]))
    print("-" * 9)


# --- Game Play ---

def play_game():
    """Starts an interactive game (Human 'O' vs AI 'X')."""
    board = [EMPTY] * 9

    while True:
        # AI ('X') Turn (Exploitation only: epsilon=0)
        ai_action = choose_action(board, epsilon=0)
        board[ai_action] = 'X'
        print("\nAI moved:")
        display_board(board)

        if check_win(board, 'X'): print("AI wins!"); break
        if not get_actions(board): print("It's a draw!"); break

        # Human ('O') Turn
        while True:
            try:
                move = int(input("\nEnter your move (0-8): "))
                if 0 <= move <= 8 and board[move] == EMPTY:
                    board[move] = 'O'
                    break
                print("Invalid move, try again.")
            except:
                print("Invalid input, try again.")

        if check_win(board, 'O'): display_board(board); print("You win!"); break
        if not get_actions(board): display_board(board); print("It's a draw!"); break


# --- Main Execution ---
if __name__ == '__main__':
    print("Training AI (X)...")
    train_ai()
    print("Training complete!")

    print("\nGame starts! You are 'O', AI is 'X'")
    print("Positions (0-8):\n", np.arange(9).reshape(3,3))

    # Uncomment the line below to play the game after training:
    play_game()

Training AI (X)...
Training complete!

Game starts! You are 'O', AI is 'X'
Positions (0-8):
 [[0 1 2]
 [3 4 5]
 [6 7 8]]

AI moved:
X |   |  
  |   |  
  |   |  
---------

Enter your move (0-8): 8

AI moved:
X |   | X
  |   |  
  |   | O
---------

Enter your move (0-8): 1

AI moved:
X | O | X
X |   |  
  |   | O
---------

Enter your move (0-8): 6

AI moved:
X | O | X
X |   |  
O | X | O
---------

Enter your move (0-8): 4

AI moved:
X | O | X
X | O | X
O | X | O
---------
It's a draw!
