In [1]:
from dataclasses import dataclass, field
import random
from abc import ABC, abstractmethod

In [None]:
MOVE_STR_TO_DIRECTION = {
    "up": (-1, 0),
    "down": (1, 0),
    "left": (0, -1),
    "right": (0, 1),
}

PERCEPT_INDEX_TO_STR = {
    0: 'Bad smell',
    1: 'Breeze',
    2: 'Nothing',
}

END_STATUS_TO_STR = {
    0: 'You found the treasure!',
    1: 'You fell in a pit!',
    2: 'You were eaten by monster!',
}

In [3]:
def is_in_bounds(pos: tuple, size: tuple) -> bool:
    return 0 <= pos[0] < size[0] and 0 <= pos[1] < size[1]

In [10]:
@dataclass
class DungeonRaiderState:
    grid_size: tuple
    agent_position: tuple
    monster_position: tuple | None = None
    treasure_position: tuple | None = None
    pit_of_death_positions: list[tuple] = field(default_factory=list)

    def __repr__(self) -> str:
        grid_map = [[" " for _ in range(self.grid_size[1])] for _ in range(self.grid_size[0])]
        grid_map[self.agent_position[0]][self.agent_position[1]] = "A"
        grid_map[self.monster_position[0]][self.monster_position[1]] = "M"
        grid_map[self.treasure_position[0]][self.treasure_position[1]] = "T"
        for pit in self.pit_of_death_positions:
            grid_map[pit[0]][pit[1]] = "P"
        return "\n".join(["|".join(row) for row in grid_map]) + "\n"

    def perceive(self) -> int:
        adjacent_positions = [
            (self.agent_position[0] + move[0], self.agent_position[1] + move[1])
            for move in MOVE_STR_TO_DIRECTION.values()
        ]
        for pos in adjacent_positions:
            if pos == self.monster_position:
                return 0
            if pos in self.pit_of_death_positions:
                return 1
        return 2

In [None]:
class BaseAgent(ABC):
    @abstractmethod
    def get_action(self, state: DungeonRaiderState, percept: int) -> str:
        pass

In [6]:
class DungeonRaiderEnvironment:
    def __init__(
        self,
        agent: BaseAgent,
        grid_size: tuple = (4, 4),
        agent_position: tuple = (0, 0),
        monster_position: tuple | None = None,
        treasure_position: tuple | None = None,
        pit_of_death_position: list[tuple] | None = None,
    ) -> None:
        # Random positions if needed
        left_positions = [(i, j) for i in range(grid_size[0]) for j in range(grid_size[1])]
        if monster_position is None:
            monster_position = random.choice(left_positions)
            left_positions.remove(monster_position)
        if treasure_position is None:
            treasure_position = random.choice(left_positions)
            left_positions.remove(treasure_position)
        if pit_of_death_position is None:
            pit_of_death_position = random.sample(left_positions, 2)
        # Initialize state
        self.state = DungeonRaiderState(
            grid_size, agent_position, monster_position, treasure_position, pit_of_death_position
        )

        self.agent = agent
    
    def step(self) -> int | None:
        percept = self.state.perceive()
        action = self.agent.get_action(self.state, percept)
        move = MOVE_STR_TO_DIRECTION[action]
        new_position = (
            self.state.agent_position[0] + move[0],
            self.state.agent_position[1] + move[1],
        )
        self.state.agent_position = new_position
        
        if new_position == self.state.treasure_position:
            return 0
        if new_position in self.state.pit_of_death_positions:
            return 1
        if new_position == self.state.monster_position:
            return 2
        
        return None
    
    def play(self, verbose: bool = True) -> list[DungeonRaiderState, int]:
        state_and_status = []
        while True:
            end_status = self.step()
            print(self.state)
            state_and_status.append((self.state, end_status))
            if end_status is not None:
                if verbose:
                    print(END_STATUS_TO_STR[end_status])
                break
        
        return state_and_status

In [51]:
class KnowledgeBasedAgent(BaseAgent):
    def __init__(self) -> None:
        self.agent_state = DungeonRaiderState((4, 4), (0, 0))
        self.maybe_pit_positions = []
        self.maybe_monster_positions = []
        self.clear_position = set([(0, 0)])

        self.path = [] # List of actions, if is [], the agent is not moving
        self.possible_positions = set([(1, 0), (0, 1), (0, 0)])

    def get_action(self, state: DungeonRaiderState, percept: int) -> str:
        # Move by computed path
        if len(self.path) != 0:
            return self.path.pop(0)
        
        # Update clear position
        self.clear_position.add(state.agent_position)

        # Remove current position from possible positions
        self.possible_positions.remove(state.agent_position)

        # Percept the environment
        if percept == 0: # Monster around there
            for _, move_direction in MOVE_STR_TO_DIRECTION.items():
                new_position = (
                    state.agent_position[0] + move_direction[0],
                    state.agent_position[1] + move_direction[1],
                )
                if is_in_bounds(new_position, state.grid_size) and new_position not in self.clear_position:
                    self.maybe_monster_positions.append(new_position)
        elif percept == 1: # Pit around there
            for _, move_direction in MOVE_STR_TO_DIRECTION.items():
                new_position = (
                    state.agent_position[0] + move_direction[0],
                    state.agent_position[1] + move_direction[1],
                )
                if is_in_bounds(new_position, state.grid_size) and new_position not in self.clear_position:
                    self.maybe_pit_positions.append(new_position)
        elif percept == 2: # Nothing around there
            pass

        # Add possible positions
        for _, move_direction in MOVE_STR_TO_DIRECTION.items():
            new_position = (
                state.agent_position[0] + move_direction[0],
                state.agent_position[1] + move_direction[1],
            )
            if is_in_bounds(new_position, state.grid_size) and new_position not in self.clear_position:
                self.possible_positions.add(new_position)

        # Compute new path
        for new_candidate_position in self.possible_positions:
            if new_candidate_position not in self.maybe_monster_positions and new_candidate_position not in self.maybe_pit_positions:
                self.path = self.compute_path(state.agent_position, new_candidate_position)
                if len(self.path) != 0:
                    break
        else: # If there is no clear path, go to a random possible position
            self.path = self.compute_path(state.agent_position, random.choice(list(self.possible_positions)), not_care_maybe=True)

        return self.path.pop(0)

    def compute_path(self, start_position: tuple, end_position: tuple, not_care_maybe: bool = False) -> list[str]:
        """ Find shortest path with BFS"""
        bfs_paths = [] # list of list of (paths, positions)
        bfs_paths.append(([], [start_position]))
        while len(bfs_paths) != 0:
            path, positions = bfs_paths.pop(0)
            # Return path of last position is the end position
            if positions[-1] == end_position:
                return path
            # Add new paths
            for move_str, move_direction in MOVE_STR_TO_DIRECTION.items():
                new_position = (
                    positions[-1][0] + move_direction[0],
                    positions[-1][1] + move_direction[1],
                )
                is_in_bound = is_in_bounds(new_position, self.agent_state.grid_size)
                is_visited = new_position in positions
                is_safe = new_position not in self.maybe_monster_positions and new_position not in self.maybe_pit_positions
                if is_in_bound and not is_visited and (is_safe or not_care_maybe):
                    bfs_paths.append((path + [move_str], positions + [new_position]))
            # Sort by path length
            bfs_paths = sorted(bfs_paths, key=lambda x: len(x[0]))
        return [] # If there is no path

In [63]:
envi = DungeonRaiderEnvironment(agent=KnowledgeBasedAgent(),
                                grid_size=(4, 4),
                                agent_position=(0, 0),
                                monster_position=(3, 0),
                                treasure_position=(2, 3),
                                pit_of_death_position=[(1, 1), (0, 3)])

envi.play()

 | | |P
A|P| | 
 | | |T
M| | | 

A| | |P
 |P| | 
 | | |T
M| | | 

 |A| |P
 |P| | 
 | | |T
M| | | 

 | |A|P
 |P| | 
 | | |T
M| | | 

 | | |P
 |P|A| 
 | | |T
M| | | 

 | | |P
 |P| | 
 | |A|T
M| | | 

 | | |P
 |P| | 
 |A| |T
M| | | 

 | | |P
 |P| | 
A| | |T
M| | | 

 | | |P
 |P| | 
 |A| |T
M| | | 

 | | |P
 |P| | 
 | |A|T
M| | | 

 | | |P
 |P| | 
 | | |T
M| | | 

You found the treasure!


[( | | |P
   |P| | 
   | | |T
  M| | | ,
  None),
 ( | | |P
   |P| | 
   | | |T
  M| | | ,
  None),
 ( | | |P
   |P| | 
   | | |T
  M| | | ,
  None),
 ( | | |P
   |P| | 
   | | |T
  M| | | ,
  None),
 ( | | |P
   |P| | 
   | | |T
  M| | | ,
  None),
 ( | | |P
   |P| | 
   | | |T
  M| | | ,
  None),
 ( | | |P
   |P| | 
   | | |T
  M| | | ,
  None),
 ( | | |P
   |P| | 
   | | |T
  M| | | ,
  None),
 ( | | |P
   |P| | 
   | | |T
  M| | | ,
  None),
 ( | | |P
   |P| | 
   | | |T
  M| | | ,
  None),
 ( | | |P
   |P| | 
   | | |T
  M| | | ,
  0)]