<a href="https://colab.research.google.com/github/debi201326/AAI_Practical/blob/main/AAI_prac_6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Practical 6: Choose a specific application domain (robotics or game playing) and design a reinforcement learning system.
(Tic-Tac-Toe)


In [1]:
import numpy as np
import random
from IPython.display import clear_output

In [2]:
class TicTacToe:
    def __init__(self):
        self.board = [' '] * 9
        self.current_player = 'X'  # Human is X, AI is O
        self.game_over = False
        self.winner = None

    def reset(self):
        self.board = [' '] * 9
        self.current_player = 'X'
        self.game_over = False
        self.winner = None
        return self.get_state()

    def get_state(self):
        # Convert the board to a tuple for use as a dictionary key
        return tuple(self.board)

    def available_moves(self):
        return [i for i, spot in enumerate(self.board) if spot == ' ']

    def make_move(self, position):
        if self.board[position] == ' ' and not self.game_over:
            self.board[position] = self.current_player
            self.check_game_over()
            if not self.game_over:
                self.current_player = 'O' if self.current_player == 'X' else 'X'
            return True
        return False

    def check_game_over(self):
        # Check rows
        for i in range(0, 9, 3):
            if self.board[i] == self.board[i+1] == self.board[i+2] != ' ':
                self.game_over = True
                self.winner = self.board[i]
                return
        # Check columns
        for i in range(3):
            if self.board[i] == self.board[i+3] == self.board[i+6] != ' ':
                self.game_over = True
                self.winner = self.board[i]
                return
        # Check diagonals
        if self.board[0] == self.board[4] == self.board[8] != ' ':
            self.game_over = True
            self.winner = self.board[0]
            return
        if self.board[2] == self.board[4] == self.board[6] != ' ':
            self.game_over = True
            self.winner = self.board[2]
            return
        # Check for tie
        if ' ' not in self.board:
            self.game_over = True
            self.winner = None

    def print_board(self):
        print("-------------")
        for i in range(3):
            print(f"| {self.board[i*3]} | {self.board[i*3+1]} | {self.board[i*3+2]} |")
            print("-------------")

    def play_human_move(self):
        available = self.available_moves()
        print("Available positions:", available)
        while True:
            try:
                pos = int(input("Enter your move (0-8): "))
                if pos in available:
                    self.make_move(pos)
                    break
                else:
                    print("Invalid move. Try again.")
            except ValueError:
                print("Please enter a number between 0-8.")


In [3]:
class QLearningAgent:
    def __init__(self):
        self.q_table = {}
        self.learning_rate = 0.1
        self.discount_factor = 0.9
        self.epsilon = 0.3  # Exploration rate
        self.epsilon_decay = 0.995
        self.min_epsilon = 0.01

    def get_q_value(self, state, action):
        if state not in self.q_table:
            self.q_table[state] = np.zeros(9)
        return self.q_table[state][action]

    def choose_action(self, state, available_actions):
        if random.random() < self.epsilon:
            # Exploration: random action
            return random.choice(available_actions)
        else:
            # Exploitation: best known action
            q_values = [self.get_q_value(state, a) for a in available_actions]
            max_q = max(q_values)
            # If multiple actions have the same max Q value, choose randomly among them
            best_actions = [a for a, q in zip(available_actions, q_values) if q == max_q]
            return random.choice(best_actions)

    def learn(self, state, action, reward, next_state, done):
        if state not in self.q_table:
            self.q_table[state] = np.zeros(9)

        current_q = self.q_table[state][action]

        if done:
            max_next_q = 0
        else:
            if next_state not in self.q_table:
                self.q_table[next_state] = np.zeros(9)
            max_next_q = max(self.q_table[next_state])

        # Q-learning update rule
        new_q = current_q + self.learning_rate * (reward + self.discount_factor * max_next_q - current_q)
        self.q_table[state][action] = new_q

        # Decay epsilon
        self.epsilon = max(self.min_epsilon, self.epsilon * self.epsilon_decay)


In [4]:
def train_agent(episodes=10000):
    env = TicTacToe()
    agent = QLearningAgent()

    for episode in range(episodes):
        state = env.reset()
        done = False

        while not done:
            # AI's turn (O)
            available_actions = env.available_moves()
            action = agent.choose_action(state, available_actions)
            env.make_move(action)
            next_state = env.get_state()

            # Check if game ended with AI's move
            if env.game_over:
                if env.winner == 'O':
                    reward = 1  # Win
                elif env.winner == 'X':
                    reward = -1  # Loss (shouldn't happen in training as human isn't playing)
                else:
                    reward = 0.5  # Tie
                done = True
            else:
                reward = 0
                # Human's turn (simulated random moves during training)
                available_actions = env.available_moves()
                if available_actions:  # If game isn't over
                    random_action = random.choice(available_actions)
                    env.make_move(random_action)
                    if env.game_over:
                        if env.winner == 'X':
                            reward = -1  # Loss
                        elif env.winner == 'O':
                            reward = 1  # Win (shouldn't happen here)
                        else:
                            reward = 0.5  # Tie
                        done = True

            # Learn from the experience
            agent.learn(state, action, reward, next_state, done)
            state = next_state

        if (episode + 1) % 1000 == 0:
            print(f"Episode {episode + 1}/{episodes}, Epsilon: {agent.epsilon:.3f}")

    return agent


In [5]:
def play_game(agent):
    env = TicTacToe()
    state = env.reset()
    env.print_board()

    while not env.game_over:
        if env.current_player == 'X':  # Human's turn
            env.play_human_move()
            print("\nHuman's move:")
            env.print_board()
        else:  # AI's turn
            available_actions = env.available_moves()
            action = agent.choose_action(env.get_state(), available_actions)
            env.make_move(action)
            print("\nAI's move:")
            env.print_board()

        if env.game_over:
            if env.winner == 'X':
                print("You win!")
            elif env.winner == 'O':
                print("AI wins!")
            else:
                print("It's a tie!")
            break


In [6]:
# Train the agent
print("Training AI agent...")
ai_agent = train_agent(episodes=10000)

# Play against the AI
print("\nTraining complete! Let's play!")
while True:
    play_game(ai_agent)
    again = input("Play again? (y/n): ").lower()
    if again != 'y':
        break

Training AI agent...
Episode 1000/10000, Epsilon: 0.010
Episode 2000/10000, Epsilon: 0.010
Episode 3000/10000, Epsilon: 0.010
Episode 4000/10000, Epsilon: 0.010
Episode 5000/10000, Epsilon: 0.010
Episode 6000/10000, Epsilon: 0.010
Episode 7000/10000, Epsilon: 0.010
Episode 8000/10000, Epsilon: 0.010
Episode 9000/10000, Epsilon: 0.010
Episode 10000/10000, Epsilon: 0.010

Training complete! Let's play!
-------------
|   |   |   |
-------------
|   |   |   |
-------------
|   |   |   |
-------------
Available positions: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Enter your move (0-8): 0

Human's move:
-------------
| X |   |   |
-------------
|   |   |   |
-------------
|   |   |   |
-------------

AI's move:
-------------
| X |   |   |
-------------
|   |   | O |
-------------
|   |   |   |
-------------
Available positions: [1, 2, 3, 4, 6, 7, 8]
Enter your move (0-8): 4

Human's move:
-------------
| X |   |   |
-------------
|   | X | O |
-------------
|   |   |   |
-------------

AI's move:
--------