# Smarter Agent

In this unit we will create a smarter agent that performs DFS and BFS

In [None]:
import copy
from io import BytesIO
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.animation import FuncAnimation
from matplotlib import rc

# Ensure animations can be displayed inline
rc('animation', html='jshtml')

class MazeGame:
    def __init__(self):
        self.board = [
            ['😊', '😺', ' '],
            [' ', ' ', ' '],
            ['😺', ' ', '😍']
        ]
        self.board_values = [
            [-1,  20, -1],
            [-1, -1, -1],
            [ 20, -1, 100]
        ]
        self.player_pos = (0, 0)  # Starting position
        self.goal_pos = (2, 2)  # Goal position
        self.board_history = []  # To store the board state at each step
        self.board_history.append(copy.deepcopy(self.board))  # Store initial state

    def render_animation(self):
        """
        Creates an animation that shows the progression of the game
        """
        fig, ax = plt.subplots()
        ax.set_xticks(np.arange(0, 3 + 1, 1))
        ax.set_yticks(np.arange(0, 3 + 1, 1))
        ax.grid(True, color='black')

        # Set limits and reverse y-axis to have (0,0) in top-left
        ax.set_xlim(0, 3)
        ax.set_ylim(0, 3)
        ax.invert_yaxis()

        # Initialize a list of text objects for each cell
        text_objects = []
        for i in range(3):
            row = []
            for j in range(3):
                text = ax.text(j + 0.5, i + 0.5, '', ha='center', va='center', fontsize=50)
                row.append(text)
            text_objects.append(row)

        # Function to update the board for each frame of the animation
        def update(frame):
            board = self.board_history[frame]
            for i in range(3):
                for j in range(3):
                    text_objects[i][j].set_text(board[i][j])
            return [item for sublist in text_objects for item in sublist]

        # Create the animation
        ani = FuncAnimation(fig, update, frames=len(self.board_history), interval=500, blit=True)
        plt.close(fig)
        return ani

    def move(self, direction):
        """
        Moves the player one cell in the specified direction.

        Args:
          direction: A string indicating the direction to move ('up', 'down', 'left', 'right').
        """
        x, y = self.player_pos
        if direction == 'up' and x > 0:
            self.board[x][y] = ' '
            x -= 1
        elif direction == 'down' and x < 2:
            self.board[x][y] = ' '
            x += 1
        elif direction == 'left' and y > 0:
            self.board[x][y] = ' '
            y -= 1
        elif direction == 'right' and y < 2:
            self.board[x][y] = ' '
            y += 1
        else:
            print("Invalid move!")
            return

        # Update the board with the new player position
        self.board[x][y] = '😊'
        self.player_pos = (x, y)

        # Check if player reached the goal
        if self.player_pos == self.goal_pos:
            self.board[x][y] = '😊😍'
            print(f"You reached the goal in {len(self.board_history)} moves!")

        # Append the updated board state to the history
        self.board_history.append(copy.deepcopy(self.board))



In [None]:
# Create a game instance
game = MazeGame()

# Example gameplay
game.move('down')
game.move('right')
game.move('down')
game.move('right')

You reached the goal in 4 moves!


In [None]:
game.render_animation()

In [None]:
# prompt: Create a class for an Agent that can play the maze

import random

class Agent:
    def __init__(self):
        pass

    def choose_action(self, possible_actions):
        """
        Chooses an action randomly from the list of possible actions.

        Args:
          possible_actions: A list of strings representing possible actions ('up', 'down', 'left', 'right').

        Returns:
          A string representing the chosen action.
        """
        return random.choice(possible_actions)

In [None]:
# prompt: Use the agent class to play ten moves
game = MazeGame()

agent = Agent()
for _ in range(20):
  possible_actions = ['up', 'down', 'left', 'right']
  action = agent.choose_action(possible_actions)
  game.move(action)

Invalid move!
Invalid move!
Invalid move!
Invalid move!
Invalid move!
Invalid move!
Invalid move!
Invalid move!
Invalid move!
Invalid move!
You reached the goal in 10 moves!


In [None]:
game.render_animation()

In [None]:
# prompt: Create a class for an Agent that can play the maze. Check that doesn't make invalid moves and that stops when reach the goal

class Agent:
    def __init__(self):
        pass

    def choose_action(self, possible_actions, game):
        """
        Chooses an action randomly from the list of possible actions.

        Args:
          possible_actions: A list of strings representing possible actions ('up', 'down', 'left', 'right').

        Returns:
          A string representing the chosen action.
        """
        x, y = game.player_pos
        valid_actions = []
        for action in possible_actions:
            if action == 'up' and x > 0:
                valid_actions.append(action)
            elif action == 'down' and x < 2:
                valid_actions.append(action)
            elif action == 'left' and y > 0:
                valid_actions.append(action)
            elif action == 'right' and y < 2:
                valid_actions.append(action)
        if not valid_actions:
            return None
        return random.choice(valid_actions)

In [None]:
game = MazeGame()

agent = Agent()
while game.player_pos != game.goal_pos:
  possible_actions = ['up', 'down', 'left', 'right']
  action = agent.choose_action(possible_actions, game)
  if action:
    game.move(action)

In [None]:
game.render_animation()

In [None]:
class Agent:
    def __init__(self, maze_game):
        """
        Initialize the agent with the MazeGame instance.
        """
        self.game = maze_game
        self.visited = set()  # Set to keep track of visited positions
        self.stack = []  # Stack to implement the DFS search
        self.solution_path = []  # Stores the successful path to the goal

    def valid_move(self, position):
        """
        Check if the move is valid (inside the maze and not visited yet).
        """
        x, y = position
        # Check if the position is within bounds and is a valid empty space
        return 0 <= x < 3 and 0 <= y < 3

    def dfs(self):
        """
        Perform depth-first search to find the path to the goal.
        """
        # Initialize the DFS with the starting position
        self.stack.append((self.game.player_pos, []))  # (current position, path to this position)

        while self.stack:
            (current_pos, path) = self.stack.pop()
            x, y = current_pos

            # Mark the current position as visited
            if current_pos in self.visited:
                continue
            self.visited.add(current_pos)

            # Check if we've reached the goal
            if current_pos == self.game.goal_pos:
                print(f"Goal found! Path: {path}")
                self.solution_path = path  # Store the solution path
                return path

            # Explore possible moves: 'up', 'down', 'left', 'right'
            possible_moves = {
                'up': (x - 1, y),
                'down': (x + 1, y),
                'left': (x, y - 1),
                'right': (x, y + 1)
            }

            # Check all possible directions and add valid ones to the stack
            for direction, new_pos in possible_moves.items():
                if self.valid_move(new_pos) and new_pos not in self.visited:
                    self.stack.append((new_pos, path + [direction]))

        print("No path found to the goal.")
        return None

    def play(self):
        """
        Automatically play the game using the DFS solution path.
        """
        # Find the solution path using DFS
        path = self.dfs()
        if path is None:
            print("No valid path to the goal.")
            return

        # Move according to the path found
        for move in path:
            self.game.move(move)



In [None]:
# Testing the agent with the MazeGame
game = MazeGame()
agent = Agent(game)

# Agent solves the maze using DFS and follows the solution path
agent.play()

Goal found! Path: ['right', 'right', 'down', 'left', 'left', 'down', 'right', 'right']
You reached the goal in 8 moves!


In [None]:
# Render the animation after the agent completes the game
game.render_animation()

## Homework:

Implement search algorithm (BFS, A*, or other) to find the best path, i.e, the path with the highest reward.

In [None]:
import heapq

class Agent:
    def __init__(self, maze_game):
        """
        Initialize the agent with the MazeGame instance.
        """
        self.game = maze_game
        self.visited = set()  # Set to keep track of visited positions
        self.priority_queue = []  # Priority queue for best-first search
        self.best_path = []  # Stores the best path to the goal
        self.max_value = float('-inf')  # Initialize to negative infinity

    def valid_move(self, position):
        """
        Check if the move is valid (inside the maze and not visited yet).
        """
        x, y = position
        return 0 <= x < 3 and 0 <= y < 3

    def best_first_search(self):
        """
        Perform best-first search to find the path to the goal with maximum value.
        """
        # Initialize the priority queue with the starting position
        heapq.heappush(self.priority_queue, (-self.game.board_values[0][0], 0, self.game.player_pos, []))  # (negative value, accumulated value, position, path)

        while self.priority_queue:
            # Get the position with the highest current value (negated for heapq to act as max heap)
            neg_value, current_value, current_pos, path = heapq.heappop(self.priority_queue)
            x, y = current_pos

            # Check if we've already visited this position
            if current_pos in self.visited:
                continue
            self.visited.add(current_pos)

            # Check if we've reached the goal
            if current_pos == self.game.goal_pos:
                # Update the best path and max value
                if current_value > self.max_value:
                    self.max_value = current_value
                    self.best_path = path
                continue  # Continue exploring to see if there is a better path

            # Explore possible moves: 'up', 'down', 'left', 'right'
            possible_moves = {
                'up': (x - 1, y),
                'down': (x + 1, y),
                'left': (x, y - 1),
                'right': (x, y + 1)
            }

            # Check all possible directions and add valid ones to the priority queue
            for direction, new_pos in possible_moves.items():
                if self.valid_move(new_pos) and new_pos not in self.visited:
                    new_x, new_y = new_pos
                    new_value = self.game.board_values[new_x][new_y]
                    # Push the new position with updated value into the priority queue
                    heapq.heappush(self.priority_queue, (-(current_value + new_value), current_value + new_value, new_pos, path + [direction]))

        if self.best_path:
            print(f"Best path found with value {self.max_value}: {self.best_path}")
        else:
            print("No path found to the goal.")
        return self.best_path

    def play(self):
        """
        Automatically play the game using the Best-First Search solution path.
        """
        # Find the best path with maximum value using Best-First Search
        path = self.best_first_search()
        if not path:
            print("No valid path to the goal.")
            return

        # Move according to the path found
        for move in path:
            self.game.move(move)


In [None]:
# Testing the agent with the MazeGame
game = MazeGame()
agent = Agent(game)

# Agent solves the maze using DFS and follows the solution path
agent.play()

Best path found with value 137: ['right', 'down', 'left', 'down', 'right', 'right']
You reached the goal in 6 moves!


In [None]:
game.render_animation()