In [1]:
from typing import List
from enum import Enum, auto
import random

In [2]:
# Make percept a clean data container that the Environment can create and the Agent can read.
class Percept():
  time_step: int
  bump : bool
  breeze : bool
  stench : bool
  scream : bool
  glitter : bool
  reward : int
  done : bool

  def __init__(self, time_step: int, bump : bool, breeze : bool, stench : bool, scream : bool, glitter : bool, reward : int, done : bool):
    self.time_step = time_step
    self.bump = bump
    self.breeze = breeze
    self.stench = stench
    self.scream = scream
    self.glitter = glitter
    self.reward = reward
    self.done = done

  def __str__(self):
    return ( f"t={self.time_step} | "
     f"bump={self.bump}, breeze={self.breeze}, stench={self.stench}, scream={self.scream}, glitter={self.glitter} | "
     f"reward={self.reward} | "
     f"done={self.done}"
    )

In [3]:
# let's test the class
#perectTest=Percept(1,True,False,False,False,False,-11,False)
#print(perectTest)
#print(perectTest.bump)
#perectTest2=Percept(1,True,False,False,False,False,-11,True)
#print(perectTest2)

In [4]:
class Action(Enum):
  LEFT = auto()
  RIGHT = auto()
  FORWARD = auto()
  GRAB = auto()
  SHOOT = auto()
  CLIMB =  auto()

In [5]:
# Let's test Action
#list(Action)
#random.choice(list(Action))

In [6]:
class Orientation(Enum):
  E = auto()
  S = auto()
  W = auto()
  N = auto()

  def symbol(self)-> str:
    return self.name

  def turn_right(self)->'Orientation':
    if self == Orientation.E:
      return Orientation.S
    if self == Orientation.S:
      return Orientation.W
    if self == Orientation.W:
      return Orientation.N
    else:
      return Orientation.E

  def turn_left(self)->'Orientation':
    if self == Orientation.E:
      return Orientation.N
    if self == Orientation.N:
      return Orientation.W
    if self == Orientation.W:
      return Orientation.S
    else:
      Orientation.E

In [7]:
# Let's test orientation
# Orientation.N.symbol()
# print(Orientation.E.turn_right())
# print(Orientation.E.turn_left())

In [8]:
class NaiveAgent():
  def choose_action(self):
    return random.choice(list(Action))

  def run(self):
    env = Environment() # this is creating an object of env from Environment Class
    cumulative_reward = 0
    percept = env.init(0.2, False)
    while not percept.done:
      env.visualize()
      print("Percept:", percept)
      action = self.choose_action()
      print("Action:", action)
      percept = env.step(action)
      cumulative_reward += percept.reward
    env.visualize()
    print("Percept:", percept)
    print('Cumulative reward:', cumulative_reward)

In [9]:
# Let's test NaiveAgent class
# n = NaiveAgent()
# print(n.choose_action())

In [10]:
class Location():
  x: int
  y: int

  def __init__(self, x: int, y: int):
    self.x = x
    self.y = y

  def __str__(self):
    return f'({self.x},{self.y})'

  def is_left_of(self, location: 'Location')-> bool:
    return self.x == location.x - 1 and self.y == location.y

  def is_right_of(self, location: 'Location')->bool:
    return self.x == location.x + 1 and self.y == location.y

  def is_above(self, location: 'Location')->bool:
    return self.x == location.x and self.y == location.y + 1

  def is_below(self, location: 'Location')->bool:
    return self.x == location.x and self.y == location.y - 1

  def is_location(self, location: 'Location')->bool:
    return self.x == location.x and self.y == location.y

  def at_left_edge(self)->bool:
    return self.x == 0

  def at_right_edge(self)->bool:
    return self.x == 3

  def at_bottom_edge(self)->bool:
    return self.y == 0

  def at_top_edge(self)->bool:
    return self.y == 3

  def neighbours(self)->list('location'):
    neighbours = []
    if not self.at_left_edge():
      neighbours.append(Location(self.x-1, self.y))
    if not self.at_right_edge():
      neighbours.append(Location(self.x+1, self.y))
    if not self.at_bottom_edge():
      neighbours.append(Location(self.x, self.y-1))
    if not self.at_top_edge():
      neighbours.append(Location(self.x, self.y+1))
    return neighbours
  def set_to(self, location:'Location'):
    self.x == location.x
    self.y == location.y
  def forward(self, orientation)->bool: #return True if bumped a wall
    if orientation == Orientation.E:
      if self.at_right_edge():
        return True
      self.x +=1
      return False

    if orientation == Orientation.S:
      if self.at_bottom_edge():
        return True
      self.y -=1
      return False

    if orientation == Orientation.W:
      if self.at_left_edge():
        return True
      self.x -=1
      return False

    # orientation == Orientation.N:
    if self.at_top_edge():
      return True
    self.y +=1
    return False

  @staticmethod
  def from_linear(n:int)->'location':
    x = n % 4
    y = n // 4
    return Location(x,y)

  def to_linear(self)->int:
    return self.x + self.y * 4

  @staticmethod
  def random()->'location':
    return Location(random.randint(0,3),random.randint(0,3))

In [11]:
# Let's test Location class
#print(Location(1,2))
#print(Location(1,2).is_left_of(Location(2,2)))  # True
#for n in Location(0,0).neighbours():
#  print(n)
#print(Location(0,0).neighbours())               # should be [(1,0), (0,1)] in some order
#loc = Location(3,0)
#print(loc.forward(Orientation.E), loc)          # True (bump), (3,0) unchanged
#loc2 = Location(1,1)
#print(loc2.forward(Orientation.N), loc2)        # False, now (1,2)

In [12]:
# Let's test Location class
# print(Location.random())
# print(Location.from_linear(15))
# print(Location(0,1).to_linear())

In [13]:
class Environment():
  wumpus_location: Location
  wumpus_alive: bool
  agent_location: Location
  agent_orientation: Orientation
  agent_has_arrow: bool
  agent_has_gold: bool
  game_over: bool
  gold_location: Location
  pit_locations: List[Location]
  time_step: int
  pit_prob: float
  allow_climb_without_gold: bool

  def init(self, pit_prob: float, allow_climb_without_gold: bool):
    self.allow_climb_without_gold=allow_climb_without_gold
    self.pit_prob=pit_prob
    self.agent_location=Location(0,0)
    self.agent_orientation=Orientation.E
    self.agent_has_arrow=True
    self.agent_has_gold=False
    self.game_over=False
    self.time_step=0

    self.make_wumpus()
    self.make_gold()
    self.make_pits(pit_prob)

    return Percept(time_step=0,
                   bump= False,
                   breeze=self.is_breeze(),
                   stench=self.is_stench(),
                   scream=False,
                   glitter=self.is_glitter(),
                   reward=0,
                   done=self.game_over)

  def make_wumpus(self):
    self.wumpus_alive = True
    while True:
      loc = Location.random()
      if not loc.is_location(Location(0,0)):
        self.wumpus_location = loc
        return

  def make_gold(self):
    while True:
      loc = Location.random()
      if not loc.is_location(Location(0,0)):
        self.gold_location = loc
        return

  def make_pits(self, pit_prob):
    self.pit_locations = []
    for n in range(16):
      loc = Location.from_linear(n)
      if loc.is_location(Location(0,0)):
        continue
      if random.random() < pit_prob:
        self.pit_locations.append(loc)
    return

  def is_pit_at(self, location)->bool:
    for p in self.pit_locations:
      if p.is_location(location):
        return True
    return False

  def is_wumpus_at(self, location)->bool:
    return self.wumpus_location.is_location(location)

  def is_agent_at(self, location)->bool:
    return self.agent_location.is_location(location)

  def is_gold_at(self, location)->bool:
    if self.agent_has_gold:
      return self.agent_location.is_location(location)
    return self.gold_location.is_location(location)

  def is_pit_adjacent_to_agent(self)->bool:
    neighbours = self.agent_location.neighbours()
    for p in self.pit_locations:
      for n in neighbours:
        if p.is_location(n):
          return True
    return False

  def is_wumpus_adjacent_to_agent(self)->bool:
    neighbours = self.agent_location.neighbours()
    for n in neighbours:
      if self.wumpus_location.is_location(n):
        return True
    return False

  def is_agent_at_hazard(self)->bool:
        return any(self.agent_location.is_location(p) for p in self.pit_locations) or (self.wumpus_location.is_location(self.agent_location) and self.wumpus_alive)

  def is_glitter(self)->bool:
        return self.is_gold_at(self.agent_location)

  def is_breeze(self)->bool:
        return self.is_pit_adjacent_to_agent() or any(p.is_location(self.agent_location) for p in self.pit_locations)

  def is_stench(self)->bool:
        return self.is_wumpus_adjacent_to_agent() or self.agent_location.is_location(self.wumpus_location)


  def wumpus_in_line_of_fire(self)->bool:
    # return true if the wumpus is a cell the arrow would pass through if fired
    ax, ay = self.agent_location.x, self.agent_location.y
    wx, wy = self.wumpus_location.x, self.wumpus_location.y

    if self.agent_orientation == Orientation.E:
      return wx > ax and wy == ay

    if self.agent_orientation == Orientation.S:
      return wx == ax and wy < ay

    if self.agent_orientation == Orientation.W:
      return wx < ax and wy == ay

    return wx == ax and wy > ay

  def kill_attempt(self)->bool:
    if self.wumpus_in_line_of_fire() and self.wumpus_alive:
      self.wumpus_alive = False
      return True
    return False

  def step(self, action: Action) -> Percept:
    if self.game_over:
        return Percept(
            time_step=self.time_step,
            bump=False,
            breeze=self.is_breeze(),
            stench=self.is_stench(),
            scream=False,
            glitter=self.is_glitter(),
            reward=0,
            done=True
        )

    self.time_step += 1
    reward = -1
    bump = False
    scream = False

    if action == Action.LEFT:
        self.agent_orientation = self.agent_orientation.turn_left()

    elif action == Action.RIGHT:
        self.agent_orientation = self.agent_orientation.turn_right()

    elif action == Action.FORWARD:
        bump = self.agent_location.forward(self.agent_orientation)

    elif action == Action.GRAB:
        if self.agent_location.is_location(self.gold_location):
            self.agent_has_gold = True

    elif action == Action.SHOOT:
        if self.agent_has_arrow:
            self.agent_has_arrow = False
            reward -= 10
            scream = self.kill_attempt()

    elif action == Action.CLIMB:
        at_start = self.agent_location.is_location(Location(0, 0))
        if at_start:
            if self.agent_has_gold:
                reward += 1000
                self.game_over = True
            elif self.allow_climb_without_gold:
                self.game_over = True

    if not self.game_over and self.is_agent_at_hazard():
        reward -= 1000
        self.game_over = True

    return Percept(
        time_step=self.time_step,
        bump=bump,
        breeze=self.is_breeze(),
        stench=self.is_stench(),
        scream=scream,
        glitter=self.is_glitter(),
        reward=reward,
        done=self.game_over
    )
  # Visualize the game state
  def visualize(self):
      for y in range(3, -1, -1):
          line = '|'
          for x in range(0, 4):
              loc = Location(x, y)
              cell_symbols = [' ', ' ', ' ', ' ']
              if self.is_agent_at(loc): cell_symbols[0] = self.agent_orientation.symbol()
              if self.is_pit_at(loc): cell_symbols[1] = 'P'
              if self.is_wumpus_at(loc):
                  if self.wumpus_alive:
                      cell_symbols[2] = 'W'
                  else:
                      cell_symbols[2] = 'w'
              if self.is_gold_at(loc): cell_symbols[3] = 'G'
              for char in cell_symbols: line += char
              line += '|'
          print(line)

In [14]:
# Let's test the Environment class
#nv = Environment()
#p = env.init(0.0, True)
#env.visualize()
#print(p)
#p = env.step(Action.SHOOT)
#print(p.reward)
#p2 = env.step(Action.SHOOT)
#print(p2.reward)
#env.agent_location = Location(0,0)
#env.agent_orientation = Orientation.W
#p = env.step(Action.FORWARD)
#print(p.bump, env.agent_location)
#env = Environment()
#env.init(0.0, True)
#env.agent_location = Location(1,0)
#p = env.step(Action.CLIMB)
#print(p.done)  # should be False (not at start)


Let's start with assignment 2.

In [16]:
class MovePlanningAgent():
  def __init__(self):
    self.has_gold=False

  def choose_action(self, percept: Percept):
    # if percept is glitter -> grab
    if percept.glitter and not self.has_gold:
      self.has_gold=True
      return Action.GRAB
    #ow choose another action
    return random.choice([Action.LEFT, Action.RIGHT, Action.FORWARD, Action.SHOOT])

  def run(self):
    env = Environment() # this is creating an object of env from Environment Class
    cumulative_reward = 0
    percept = env.init(0.2, False)
    while not percept.done:
      env.visualize()
      print("Percept:", percept)
      print("Has Gold:", self.has_gold)
      action = self.choose_action(percept)
      print("Action:", action)
      percept = env.step(action)
      cumulative_reward += percept.reward
    env.visualize()
    print("Percept:", percept)
    print('Cumulative reward:', cumulative_reward)