In [2]:
from typing import List
from enum import Enum, auto
import random

import copy
import numpy as np

In [3]:
class Percept():
    time_step: int
    bump: bool
    breeze: bool
    stench: bool
    scream: bool
    glitter: bool
    reward: int
    done: bool

    def __init__(self, time_step: int, bump: bool, breeze: bool, stench: bool, scream: bool, glitter: bool, reward: int, done: bool):
        self.time_step = time_step
        self.bump = bump
        self.breeze = breeze
        self.stench = stench
        self.scream = scream
        self.glitter = glitter
        self.reward = reward
        self.done = done
        
    def __str__(self):
        return f'time:{self.time_step}: bump:{self.bump}, breeze:{self.breeze}, stench:{self.stench}, scream:{self.scream}, glitter:{self.glitter}, reward:{self.reward}, done:{self.done}'

In [4]:
class Action(Enum):
    LEFT = 0
    RIGHT = 1
    FORWARD = 2
    GRAB = 3
    SHOOT = 4
    CLIMB = 5
    
    
    @staticmethod
    def random() -> 'Action':
        return random.choice(list(Action))
    
    @staticmethod
    def from_int(n: int) -> 'Action':
        return Action(n)

In [5]:
class Orientation(Enum):
    E = 0
    S = 1
    W = 2
    N = 3

    def symbol(self) -> str:
        match self:
            case Orientation.E:
                return '>'
            case Orientation.S:
                return 'v'
            case Orientation.W:
                return '<'
            case Orientation.N:
                return '^'

    def turn_right(self) -> 'Orientation':
        match self:
            case Orientation.E:
                return Orientation.S
            case Orientation.S:
                return Orientation.W
            case Orientation.W:
                return Orientation.N
            case Orientation.N:
                return Orientation.E

    def turn_left(self) -> 'Orientation':
        match self:
            case Orientation.E:
                return Orientation.N
            case Orientation.N:
                return Orientation.W
            case Orientation.W:
                return Orientation.S
            case Orientation.S:
                return Orientation.E
        

In [6]:
class WumpusHunterAgent:
    location_x = 1
    location_y = 1
    orientation = Orientation.E
    has_gold = False
    has_arrow = True
    wumpus_alive = True
    
    def turn_left(self):
        orientation = orientation.turnLeft()
        
    def turn_right(self):
        orientation = orientation.turnRight()
        
    def forward(self):
        match orientation:
            case Orientation.E: self.location_x = min(location_x + 1, grid_width)
            case Orientation.S: self.location_y = min(location_y + 1, grid_height)
            case Orientation.W: self.location_x = max(location_x - 1, 0)
            case Orientation.N: self.location_y = max(location_y - 1, 0)
        
    def update_belief_state(action, percept):
        belief_state.append(percept)
        match action:
            case Action.LEFT: turn_left()
            case Action.RIGHT: turn_right()
            case Action.FORWARD: turn_forward()
            case Action.SHOOT:
                has_arrow = false
                if percept.scream: wumpus_alive = False
            case Action.GRAB:
                if percept.glitter: has_gold = True
            
    
    def run() -> List[Percept]:
        percept = Environment(grid_width, grid_height, pit_prob, allow_climb_without_gold)
        while not percept.done:
            action = planner.next()
            percept = environment.act(action)
            update_belief_state(action, percept)

In [7]:
class Location:
    x: int
    y: int
        
    def __init__(self, x: int, y: int):
        self.x = x
        self.y = y
        
    def __str__(self):
        return f'({self.x}, {self.y})'
    
    def is_left_of(self, location: 'Location')->bool:
        return self.x < location.x and self.y == location.y
        
    def is_right_of(self, location: 'Location')->bool:
        return self.x > location.x and self.y == location.y
        
    def is_above(self, location: 'Location')->bool:
        return self.y > location.y and self.x == location.x
        
    def is_below(self, location: 'Location')->bool:
        return self.y < location.y and self.x == location.x
        
    def neighbours(self)->List['Location']:
        neighbourList = []
        if self.x > 0: neighbourList.append(Location(self.x - 1, self.y))
        if self.x < 3: neighbourList.append(Location(self.x + 1, self.y))
        if self.y > 0: neighbourList.append(Location(self.x, self.y - 1))
        if self.y < 3: neighbourList.append(Location(self.x, self.y + 1))
        return neighbourList
    
    def is_location(self, location: 'Location')->bool:
        return self.x == location.x and self.y == location.y
    
    def at_left_edge(self) -> bool:
        return self.x == 0
    
    def at_right_edge(self) -> bool:
        return self.x == 3
    
    def at_top_edge(self) -> bool:
        return self.y == 3
    
    def at_bottom_edge(self) -> bool:
        return self.y == 0
    
    def forward(self, orientation) -> bool:
        bump = False
        match orientation:
            case Orientation.W:
                if self.at_left_edge():
                    bump = True
                else: self.x = self.x - 1
            case Orientation.E:
                if self.at_right_edge():
                    bump = True
                else: self.x = self.x + 1
            case Orientation.N:
                if self.at_top_edge():
                    bump = True
                else: self.y = self.y + 1
            case Orientation.S:
                if self.at_bottom_edge():
                    bump = True
                else: self.y = self.y - 1
        return bump
    
    def set_to(self, location: 'Location'):
        self.x = location.x
        self.y = location.y
        
    @staticmethod
    def from_linear(n: int) -> 'Location':
        return Location(n % 4, n // 4)
    
    def to_linear(self)->int:
        return self.y * 4 + self.x
    
    @staticmethod
    def random() -> 'Location':
        return Location.from_linear(random.randint(1, 15))

In [8]:
class Environment:
    wumpus_location: Location
    wumpus_alive: bool
    has_wumpus: bool
    agent_location: Location
    agent_orientation: Orientation
    agent_has_arrow: bool
    agent_has_gold: bool
    game_over: bool
    gold_location: Location
    pit_locations: List[Location]
    time_step: int
    
    def init(self, pit_prob: float, allow_climb_without_gold: bool, has_wumpus: bool):
        self.agent_location = Location(0, 0)
        self.agent_orientation = Orientation.E
        self.agent_has_arrow = True
        self.agent_has_gold = False
        self.pit_prob = pit_prob
        self.allow_climb_without_gold = allow_climb_without_gold
        self.has_wumpus = has_wumpus
        self.make_wumpus(has_wumpus)
        self.make_gold()
        self.make_pits(pit_prob)
        self.game_over = False
        self.time_step = 0
        return Percept(self.time_step, False, self.is_breeze(), self.is_stench(), False, False, 0, False)
    
    def make_wumpus(self, has_wumpus: bool):
        self.wumpus_location = Location.random()
        self.wumpus_alive = has_wumpus
        
    def make_gold(self):
        self.gold_location = Location.random()
        
    def make_pits(self, pit_prob: float):
        self.pit_locations = []
        for i in range(1, 16):
            if random.random() < pit_prob: self.pit_locations.append(Location.from_linear(i))
    
    def is_pit_at(self, location: Location) -> bool:
        return any(pit_location.is_location(location) for pit_location in self.pit_locations)
    
    def is_pit_adjacent_to_agent(self) -> bool:
        for agent_neighbour in self.agent_location.neighbours():
            for pit_location in self.pit_locations:
                if agent_neighbour.is_location(pit_location):
                    return True
        return False
    
    def is_wumpus_adjacent_to_agent(self) -> bool:
        return self.has_wumpus and any(self.wumpus_location.is_location(neighbour) for neighbour in self.agent_location.neighbours())
    
    def is_agent_at_hazard(self)->bool:
        return self.is_pit_at(self.agent_location) or (self.is_wumpus_at(self.agent_location) and self.wumpus_alive)
    
    def is_wumpus_at(self, location: Location) -> bool:
        return self.has_wumpus and self.wumpus_location.is_location(location)
    
    def is_agent_at(self, location: Location) -> bool:
        return self.agent_location.is_location(location)
    
    def is_gold_at(self, location: Location) -> bool:
        return self.gold_location.is_location(location)
    
    def is_glitter(self) -> bool:
        return self.is_gold_at(self.agent_location)
    
    def is_breeze(self) -> bool:
        return self.is_pit_adjacent_to_agent() or self.is_pit_at(self.agent_location)
    
    def is_stench(self) -> bool:
        return self.is_wumpus_adjacent_to_agent() or self.is_wumpus_at(self.agent_location)
    
    def wumpus_in_line_of_fire(self) -> bool:
        match self.agent_orientation:
            case Orientation.E: return self.has_wumpus and self.agent_location.is_left_of(self.wumpus_location)
            case Orientation.S: return self.has_wumpus and self.agent_location.is_above(self.wumpus_location)
            case Orientation.W: return self.has_wumpus and self.agent_location.is_right_of(self.wumpus_location)
            case Orientation.N: return self.has_wumpus and self.agent_location.is_below(self.wumpus_location) 
    
    def kill_attempt(self) -> bool:
        if not (self.has_wumpus and self.wumpus_alive): return False
        scream = self.wumpus_in_line_of_fire()
        self.wumpus_alive = not scream
        return scream
    
    def step(self, action: Action) -> Percept:
        special_reward = 0
        bump = False
        scream = False
        #if self.time_step == 999:
          #  self.game_over = True
        if self.game_over:
            reward = 0
        else:
            match action:
                case Action.LEFT:
                    self.agent_orientation = self.agent_orientation.turn_left()
                case Action.RIGHT: 
                    self.agent_orientation = self.agent_orientation.turn_right()
                case Action.FORWARD:
                    bump = self.agent_location.forward(self.agent_orientation)
                    if self.agent_has_gold: self.gold_location.set_to(self.agent_location)
                    if self.is_agent_at_hazard():
                        special_reward = -1000
                        self.game_over = True
                case Action.GRAB:
                    if self.agent_location.is_location(self.gold_location):
                        self.agent_has_gold = True
                case Action.SHOOT:
                    if self.agent_has_arrow:
                        scream = self.kill_attempt()
                        special_reward = -10
                        self.agent_has_arrow = False
                case Action.CLIMB:
                    if self.agent_location.is_location(Location(0, 0)):
                        if self.agent_has_gold:
                           special_reward = 1000
                        if self.allow_climb_without_gold or self.agent_has_gold:
                            self.game_over = True
            reward = -1 + special_reward
        
        breeze = self.is_breeze()
        stench = self.is_stench()
        glitter = self.is_glitter()
        self.time_step = self.time_step + 1
        return Percept(self.time_step, bump, breeze, stench, scream, glitter, reward, self.game_over)
                   
    def visualize(self):
        for y in range(3, -1, -1):
            line = '|'
            for x in range(0, 4):
                loc = Location(x, y)
                cell_symbols = [' ', ' ', ' ', ' ']
                if self.is_agent_at(loc): cell_symbols[0] = self.agent_orientation.symbol()
                if self.is_pit_at(loc): cell_symbols[1] = 'P'
                if self.has_wumpus and self.is_wumpus_at(loc):
                    if self.wumpus_alive:
                        cell_symbols[2] = 'W'
                    else:
                        cell_symbols[2] = 'w'
                if self.is_gold_at(loc): cell_symbols[3] = 'G'
                for char in cell_symbols: line += char
                line += '|'
            print(line)
    

In [9]:
class NaiveAgent:
    
    def choose_action(self):
        return Action.random()
    
    def run(self):
        env = Environment()
        cumulative_reward = 0
        percept = env.init(0.2, False, True)
        while not percept.done:
            env.visualize()
            print('Percept:', percept)
            action = self.choose_action()
            print()
            print('Action:', action)
            print()
            percept = env.step(action)
            cumulative_reward += percept.reward
        env.visualize()
        print('Percept:', percept)
        print('Cumulative reward:', cumulative_reward)

In [10]:
NaiveAgent().run()

|    |    |  W |    |
|    |    | P  |    |
|   G| P  |    | P  |
|>   |    |    |    |
Percept: time:0: bump:False, breeze:False, stench:False, scream:False, glitter:False, reward:0, done:False

Action: Action.CLIMB

|    |    |  W |    |
|    |    | P  |    |
|   G| P  |    | P  |
|>   |    |    |    |
Percept: time:1: bump:False, breeze:False, stench:False, scream:False, glitter:False, reward:-1, done:False

Action: Action.LEFT

|    |    |  W |    |
|    |    | P  |    |
|   G| P  |    | P  |
|^   |    |    |    |
Percept: time:2: bump:False, breeze:False, stench:False, scream:False, glitter:False, reward:-1, done:False

Action: Action.LEFT

|    |    |  W |    |
|    |    | P  |    |
|   G| P  |    | P  |
|<   |    |    |    |
Percept: time:3: bump:False, breeze:False, stench:False, scream:False, glitter:False, reward:-1, done:False

Action: Action.RIGHT

|    |    |  W |    |
|    |    | P  |    |
|   G| P  |    | P  |
|^   |    |    |    |
Percept: time:4: bump:False, breeze:Fal