In [91]:
import random

class TicTacToe:
    def __init__(self):
        self.board = [0] * 9  # 0 for empty, 1 for X, -1 for O
        self.game_over = False
        self.current_player = 1  # X starts first

    def is_valid_move(self, position):
        return self.board[position] == 0

    def make_move(self, position):
        if not self.is_valid_move(position):
            return False
        self.board[position] = self.current_player
        if self.check_winner():
            self.game_over = True
        return True

    def change_turn(self):
        self.current_player *= -1

    def check_winner(self):
        # Horizontal, vertical, and diagonal checks
        wins = [(0, 1, 2), (3, 4, 5), (6, 7, 8),
                (0, 3, 6), (1, 4, 7), (2, 5, 8),
                (0, 4, 8), (2, 4, 6)]
        for a, b, c in wins:
            if self.board[a] == self.board[b] == self.board[c] != 0:
                return True
        return False

    def get_empty_positions(self):
        return [i for i, x in enumerate(self.board) if x == 0]

    def reset(self):
        self.board = [0] * 9
        self.game_over = False
        self.current_player = 1


class QLearningAgent:
    def __init__(self, alpha=0.1, gamma=0.9, epsilon=0.2):
        # Separate Q-tables for X and O
        self.q_tables = {1: {}, -1: {}}  # 1 for X, -1 for O
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon

    def get_q_value(self, player, state, action):
        return self.q_tables[player].get((tuple(state), action), 0)

    def choose_action(self, player, state, available_actions):
        if not available_actions:
            return None
        if random.random() < self.epsilon:
            return random.choice(available_actions)
        qs = [self.get_q_value(player, state, a) for a in available_actions]
        max_q = max(qs)
        return available_actions[qs.index(max_q)]


    def update_q_value(self, player, state, action, reward, next_state, next_available_actions, opponent):
        current_q = self.get_q_value(player, state, action)
        best_opponent_action = max([self.get_q_value(opponent, next_state, a) for a in next_available_actions], default=0)
        self.q_tables[player][(tuple(state), action)] = current_q + self.alpha * (reward - self.gamma * best_opponent_action - current_q)


def train_agent(agent, episodes=1000):
    game = TicTacToe()
    for _ in tqdm(range(episodes)):
        game.reset()
        while not game.game_over:
            state = game.board.copy()
            current_player = game.current_player
            available_actions = game.get_empty_positions()
            action = agent.choose_action(current_player, state, available_actions)
            if action is not None:
                game.make_move(action)
                next_state = game.board.copy()
                next_available_actions = game.get_empty_positions()

                if game.game_over:
                    if game.check_winner():
                        reward = 1 if game.current_player == current_player else -1
                    else:
                        reward = -0.25  # It's a tie
                else:
                    reward = 0

                opponent = -current_player
                agent.update_q_value(current_player, state, action, reward, next_state, next_available_actions, opponent)
                game.change_turn()
            if game.game_over or action is None:
                break


# Adjust make_move and check_winner accordingly if needed.
agent = QLearningAgent(alpha=0.05, gamma=0.8, epsilon=0.1)
train_agent(agent, episodes=100_000)

  0%|          | 0/100000 [00:00<?, ?it/s]

100%|██████████| 100000/100000 [00:07<00:00, 12651.00it/s]


In [94]:
import random

def play_against_random(agent, games=100):
    wins = 0
    losses = 0
    ties = 0
    agent.epsilon = 0  # No exploration
    for _ in range(games):
        game = TicTacToe()
        random_player = random.choice([-1, 1])  # Randomly decide who starts first

        while not game.game_over:
            available_actions = game.get_empty_positions()
            if not available_actions:  # No available actions, board is full
                ties += 1
                break

            if game.current_player == random_player:
                # Random player's turn
                action = random.choice(available_actions)
                game.make_move(action)
            else:
                # Agent's turn
                action = agent.choose_action(game.current_player, game.board, available_actions)
                if action is not None:
                    game.make_move(action)
            
            # Check game status
            if game.check_winner():
                if game.current_player != random_player:
                    wins += 1
                elif game.current_player == random_player:
                    losses += 1
                game.game_over = True

            game.change_turn()

    print(f"Out of {games} games:")
    print(f"Wins: {wins}")
    print(f"Losses: {losses}")
    print(f"Ties: {ties}")

# Example of using this function
play_against_random(agent, games=100)


Out of 100 games:
Wins: 84
Losses: 4
Ties: 12
