# Assignment 2

This notebook contains the following implementations:

* Fully Observable Environment
* Partially Obervable Environment
* Simple Reflex Agent
* Model-based Reflex Agent

# Base classes

In [31]:

import math

def clip(x, lowest, highest):
    """Return x clipped to the range [lowest..highest]."""
    return max(lowest, min(x, highest))

orientations = [(1, 0), (0, 1), (-1, 0), (0, -1)]


def turn_heading(heading, inc, headings=orientations):
    return headings[(headings.index(heading) + inc) % len(headings)]


def turn_right(heading):
    return turn_heading(heading, -1)


def turn_left(heading):
    return turn_heading(heading, +1)


def distance(a, b):
    """The distance between two (x, y) points."""
    return math.hypot((a[0] - b[0]), (a[1] - b[1]))


def distance2(a, b):
    "The square of the distance between two (x, y) points."
    return (a[0] - b[0])**2 + (a[1] - b[1])**2


def vector_clip(vector, lowest, highest):
    """Return vector, except if any element is less than the corresponding
    value of lowest or more than the corresponding value of highest, clip to
    those values."""
    return type(vector)(map(clip, vector, lowest, highest))

from statistics import mean

import random
import copy
import collections

# ______________________________________________________________________________


class Thing(object):

    """This represents any physical object that can appear in an Environment.
    You subclass Thing to get the things you want.  Each thing can have a
    .__name__  slot (used for output only)."""

    def __repr__(self):
        return '<{}>'.format(getattr(self, '__name__', self.__class__.__name__))

    def is_alive(self):
        "Things that are 'alive' should return true."
        return hasattr(self, 'alive') and self.alive

    def show_state(self):
        "Display the agent's internal state.  Subclasses should override."
        print("I don't know how to show_state.")

    def display(self, canvas, x, y, width, height):
        # Do we need this?
        "Display an image of this Thing on the canvas."
        pass


class Agent(Thing):

    """An Agent is a subclass of Thing with one required slot,
    .program, which should hold a function that takes one argument, the
    percept, and returns an action. (What counts as a percept or action
    will depend on the specific environment in which the agent exists.)
    Note that 'program' is a slot, not a method.  If it were a method,
    then the program could 'cheat' and look at aspects of the agent.
    It's not supposed to do that: the program can only look at the
    percepts.  An agent program that needs a model of the world (and of
    the agent itself) will have to build and maintain its own model.
    There is an optional slot, .performance, which is a number giving
    the performance measure of the agent in its environment."""

    def __init__(self, program=None):
        self.alive = True
        self.bump = False
        self.holding = []
        self.performance = 0
        if program is None:
            def program(percept):
                return eval(input('Percept={}; action? ' .format(percept)))
        assert isinstance(program, collections.Callable)
        self.program = program

    def can_grab(self, thing):
        """Returns True if this agent can grab this thing.
        Override for appropriate subclasses of Agent and Thing."""
        return False

class Environment(object):

    """Abstract class representing an Environment.  'Real' Environment classes
    inherit from this. Your Environment will typically need to implement:
        percept:           Define the percept that an agent sees.
        execute_action:    Define the effects of executing an action.
                           Also update the agent.performance slot.
    The environment keeps a list of .things and .agents (which is a subset
    of .things). Each agent has a .performance slot, initialized to 0.
    Each thing has a .location slot, even though some environments may not
    need this."""

    def __init__(self):
        self.things = []
        self.agents = []

    def thing_classes(self):
        return []  # List of classes that can go into environment

    def percept(self, agent):
        '''
            Return the percept that the agent sees at this point.
            (Implement this.)
        '''
        raise NotImplementedError

    def execute_action(self, agent, action):
        "Change the world to reflect this action. (Implement this.)"
        raise NotImplementedError

    def default_location(self, thing):
        "Default location to place a new thing with unspecified location."
        return None

    def exogenous_change(self):
        "If there is spontaneous change in the world, override this."
        pass

    def is_done(self):
        "By default, we're done when we can't find a live agent."
        return not any(agent.is_alive() for agent in self.agents)

    def step(self):
        """Run the environment for one time step. If the
        actions and exogenous changes are independent, this method will
        do.  If there are interactions between them, you'll need to
        override this method."""
        if not self.is_done():
            actions = []
            for agent in self.agents:
                if agent.alive:
                    actions.append(agent.program(self.percept(agent)))
                else:
                    actions.append("")
            for (agent, action) in zip(self.agents, actions):
                self.execute_action(agent, action)
            self.exogenous_change()

    def run(self, steps=1000):
        "Run the Environment for given number of time steps."
        for step in range(steps):
            if self.is_done():
                return
            self.step()

    def list_things_at(self, location, tclass=Thing):
        "Return all things exactly at a given location."
        return [thing for thing in self.things
                if thing.location == location and isinstance(thing, tclass)]

    def some_things_at(self, location, tclass=Thing):
        """Return true if at least one of the things at location
        is an instance of class tclass (or a subclass)."""
        return self.list_things_at(location, tclass) != []

    def add_thing(self, thing, location=None):
        """Add a thing to the environment, setting its location. For
        convenience, if thing is an agent program we make a new agent
        for it. (Shouldn't need to override this."""
        if not isinstance(thing, Thing):
            thing = Agent(thing)
        assert thing not in self.things, "Don't add the same thing twice"
        thing.location = location if location is not None else self.default_location(thing)
        self.things.append(thing)
        if isinstance(thing, Agent):
            thing.performance = 0
            self.agents.append(thing)

    def delete_thing(self, thing):
        """Remove a thing from the environment."""
        try:
            self.things.remove(thing)
        except ValueError as e:
            print(e)
            print("  in Environment delete_thing")
            print("  Thing to be removed: {} at {}" .format(thing, thing.location))
            print("  from list: {}" .format([(thing, thing.location) for thing in self.things]))
        if thing in self.agents:
            self.agents.remove(thing)

class Direction():
    '''A direction class for agents that want to move in a 2D plane
        Usage:
            d = Direction("Down")
            To change directions:
            d = d + "right" or d = d + Direction.R #Both do the same thing
            Note that the argument to __add__ must be a string and not a Direction object.
            Also, it (the argument) can only be right or left. '''

    R = "right"
    L = "left"
    U = "up"
    D = "down"

    def __init__(self, direction):
        self.direction = direction

    def __add__(self, heading):
        if self.direction == self.R:
            return{
                self.R: Direction(self.D),
                self.L: Direction(self.U),
            }.get(heading, None)
        elif self.direction == self.L:
            return{
                self.R: Direction(self.U),
                self.L: Direction(self.L),
            }.get(heading, None)
        elif self.direction == self.U:
            return{
                self.R: Direction(self.R),
                self.L: Direction(self.L),
            }.get(heading, None)
        elif self.direction == self.D:
            return{
                self.R: Direction(self.L),
                self.L: Direction(self.R),
            }.get(heading, None)

    def move_forward(self, from_location):
        x, y = from_location
        if self.direction == self.R:
            return (x+1, y)
        elif self.direction == self.L:
            return (x-1, y)
        elif self.direction == self.U:
            return (x, y-1)
        elif self.direction == self.D:
            return (x, y+1)

# Allow Actions

In [32]:
# All possible actions
# that can be executed 
# by the agents (Simple Reflex and Model Based)

TURN = 'Turn'
ADVANCED = 'Advance'
STAY = 'Stay'

# Item classes

In [33]:
# Thing class
class Item(Thing):
  def __init__(self, points = 0):
    self.points = points

# Trap class
class Trap(Item):
  def __init__(self):
    super(Trap, self).__init__(-5)

# Gold class
class Gold(Item):
  def __init__(self):
    super(Gold, self).__init__(10)

# Agent Implementation

In [34]:
# travel
# returns an allowed action based on
# agent current location (origin) and a target (destino)
# @param (Tuple) origen: (row, col, direction)
# @param (Tuple) destino: (row, col)
# @return (String)
def travel(origen, destino):
  if origen[2] == Direction.R:
    if origen[1] < destino[1]:
      return ADVANCED
    else:
      return TURN

  if origen[2] == Direction.U:
    if origen[0] <= destino[0]:
      return TURN
    else:
      return ADVANCED

  if origen[2] == Direction.D:
    if origen[0] < destino[0]:
      return ADVANCED
    else:
      return TURN

  if origen[2] == Direction.L:
    if origen[1] <= destino[1]:
      return TURN
    else:
        return ADVANCED


# BaseReflexAgent class
# Base class used to implement 
# Simple Reflex and Model-based reflex agents
class BaseReflexAgent(Agent):
  # initialize instance variables
  def __init__(self, program):
    super(BaseReflexAgent, self).__init__(program)
    self.currentDirection = None
  
  # serialized representation of an agent
  def __str__(self):
    return 'Agent state: (%s, %s, %s)' % (
      self.location[0],
      self.location[1],
      self.currentDirection.direction
    )

  # modifyPerformance
  # changes the agent performance
  # @param {Integer} amount
  # @return {Void}
  def modifyPerformance(self, amount):
    self.performance += amount

  # checkBounds
  # checks wether or not the agent can go forward
  # if so, returns the new coordinates
  # else None
  # @return {Tuple} newPosition (row, col)
  def checkBounds(self):
    newX = self.location[0]
    newY = self.location[1]
    currentDir = self.currentDirection

    if (currentDir.direction == Direction.U):
      newX = self.location[0] - 1
    elif (currentDir.direction == Direction.R):
      newY = self.location[1] + 1
    elif (currentDir.direction == Direction.D):
      newX = self.location[0] + 1
    elif (currentDir.direction == Direction.L):
      newY = self.location[1] - 1

    if newX < 0 or newY < 0 or newX >= 5 or newY >= 5:
      return None
    else:
      return (newX,newY)

  # turn
  # changes the agent direction clockwise
  # @return {Void}
  def turn(self):
    self.modifyPerformance(-1)
    self.currentDirection = self.currentDirection + Direction.R


  # forward
  # changes the agent current location
  # @return {Boolean}
  def forward(self):
    raise NotImplementedError

# rank
# gets the gold item that requires the least amount steps to get reached
# @param {Tuple} origen (row, col, direction)
# @param {List<Things>} destinos
# @return {Thing}
def rank(origen, destinos):
  bestDestino = None
  bestSuma = 10000000
  suma = 0
  destino = None
  for thing in destinos:
    if isinstance(thing, Trap):
      continue

    destino = thing.location
    suma = abs(destino[0]-origen[0])+abs(destino[1]-origen[1])
    if destino[0] > origen[0]: # Esta abajo el destino
      if origen[2] == Direction.R:
        suma += 1
      if origen[2] == Direction.U:
        suma += 2
      if origen[2] == Direction.L:
        suma += 3
    if destino[0] < origen[0]: # esta arriba el destino
      if origen[2] == Direction.R:
        suma += 3
      if origen[2] == Direction.D:
        suma += 2
      if origen[2] == Direction.L:
        suma += 1
    if destino[1] > origen[1]: # esta a la derecha el destino
      if origen[2] == Direction.U:
        suma += 1
      if origen[2] == Direction.D:
        suma += 3
      if origen[2] == Direction.L:
        suma += 2
    if destino[1] < origen[1]: # esta a la izquierda el destino
      if origen[2] == Direction.U:
        suma += 3
      if origen[2] == Direction.D:
        suma += 1
      if origen[2] == Direction.R:
        suma += 2

    if suma < bestSuma:
      bestSuma = suma
      bestDestino = thing

  return bestDestino

# SimpleReflexAgent
# used to create instances of simple reflex agent
class SimpleReflexAgent(BaseReflexAgent):
  # forward
  # returns true wether the agent was able to move
  # if true, agent location changes and performance decreces by 1
  # if false, agent location remains the same and performance decreces by 1
  # @return {Boolean}
  def forward(self):
    self.modifyPerformance(-1)
    newLocation = self.checkBounds()
   
    if newLocation:
      self.location = newLocation
      return True
    return False 

# createSimpleReflexAgent
# factory method that creates instances of simple reflex agent
# @return {SimpleReflexAgent}
def createSimpleReflexAgent():
  agent = None

  # program
  # simple reflex agent's program
  # @param {List<Things, List<Tuple>} percepts
  # @return {String} action
  def program(percepts):
    things, cells = percepts
    agentCurrentLocation = (agent.location[0], agent.location[1], agent.currentDirection.direction)

    # computes the best option
    bestOption = rank(agentCurrentLocation, things)
    nextAction = ''

    # if no best option
    # kill agent
    if not bestOption:
      nextAction = STAY
      agent.alive = False
    elif (agent.location[0] == bestOption.location[0] and agent.location[1] == bestOption.location[1]):     # if agent location and best option location are equal
      nextAction = STAY                                                                                     # set next action to STAY
    else:
      nextAction = travel(agentCurrentLocation, bestOption.location)                                        # else compute next action
    return nextAction       
  
  agent = SimpleReflexAgent(program)  # init SimpleReflexAgent instance

  return agent


# rankUnvisited
# gets the cells that requires the least amount steps to get reached
# @param {Tuple} origen (row, col, direction)
# @param {List<Tuple>} notVisited
# @return {Tuple}
def rankUnvisited(origen, notVisited):
  bestDestino = None
  bestSuma = 10000000
  suma = 0

  for destino in notVisited:
    suma = abs(destino[0]-origen[0])+abs(destino[1]-origen[1])
    if destino[0] > origen[0]: # Esta abajo el destino
      if origen[2] == Direction.R:
        suma += 1
      if origen[2] == Direction.U:
        suma += 2
      if origen[2] == Direction.L:
        suma += 3
    if destino[0] < origen[0]: # esta arriba el destino
      if origen[2] == Direction.R:
        suma += 3
      if origen[2] == Direction.D:
        suma += 2
      if origen[2] == Direction.L:
        suma += 1
    if destino[1] > origen[1]: # esta a la derecha el destino
      if origen[2] == Direction.U:
        suma += 1
      if origen[2] == Direction.D:
        suma += 3
      if origen[2] == Direction.L:
        suma += 2
    if destino[1] < origen[1]: # esta a la izquierda el destino
      if origen[2] == Direction.U:
        suma += 3
      if origen[2] == Direction.D:
        suma += 1
      if origen[2] == Direction.R:
        suma += 2

    if suma < bestSuma:
      bestSuma = suma
      bestDestino = destino

  return bestDestino

# rankWithNotVisited
# gets the gold item that requires the least amount steps to get reached
# @param {Tuple} origen (row, col, direction)
# @param {List<Things>} destinos
# @param {List<Tuple>} notVisited
# @return {Thing}
def rankWithNotVisited(origen, destinos, notVisited):
  bestDestino = None
  bestSuma = 10000000
  suma = 0
  destino = None
  for thing in destinos:
    if isinstance(thing, Trap):
      continue

    destino = thing.location
    suma = abs(destino[0]-origen[0])+abs(destino[1]-origen[1])
    if destino[0] > origen[0]: # Esta abajo el destino
      if origen[2] == Direction.R:
        suma += 1
      if origen[2] == Direction.U:
        suma += 2
      if origen[2] == Direction.L:
        suma += 3
    if destino[0] < origen[0]: # esta arriba el destino
      if origen[2] == Direction.R:
        suma += 3
      if origen[2] == Direction.D:
        suma += 2
      if origen[2] == Direction.L:
        suma += 1
    if destino[1] > origen[1]: # esta a la derecha el destino
      if origen[2] == Direction.U:
        suma += 1
      if origen[2] == Direction.D:
        suma += 3
      if origen[2] == Direction.L:
        suma += 2
    if destino[1] < origen[1]: # esta a la izquierda el destino
      if origen[2] == Direction.U:
        suma += 3
      if origen[2] == Direction.D:
        suma += 1
      if origen[2] == Direction.R:
        suma += 2

    if suma < bestSuma:
      bestSuma = suma
      bestDestino = thing.location

  if bestSuma == 10000000:
    return rankUnvisited(origen,notVisited)

  return bestDestino

# SimpleReflexAgent
# used to create instances of model based reflex agent
class ModelReflexAgent(BaseReflexAgent):
  # forward
  # returns true wether the agent was able to move
  # if true, agent location changes and performance decreces by 1
  # if false, agent location remains the same and performance decreces by 1
  # @return {Boolean}
  def forward(self):
    self.modifyPerformance(-1)
    newLocation = self.checkBounds()

    if newLocation:
      self.location = newLocation
      self.visited.add(newLocation)
      if self.visited.intersection(newLocation):
        self.modifyPerformance(-2)
      return True
    return False


# createModelReflexAgent
# factory method that creates instances of model based reflex agent
# @return {SimpleReflexAgent}
def createModelReflexAgent():
  agent = None

  # program
  # model based reflex agent's program
  # @param {List<Things, List<Tuple>} percepts
  # @return {String} action
  def program(percepts):
    things, cells = percepts
    # update agent internal state
    agent.visible.update(cells)
    agentCurrentLocation = (agent.location[0], agent.location[1], agent.currentDirection.direction)

    # computes the best option
    bestOption = rankWithNotVisited(agentCurrentLocation, things, agent.visible-agent.visited)
    nextAction = ''

    # if no best option
    # kill agent
    if not bestOption:
      nextAction = STAY
      agent.alive = False
    elif (agent.location[0] == bestOption[0] and agent.location[1] == bestOption[1]):   # if agent location and best option location are equal
      nextAction = STAY                                                                 # set next action to STAY
    else:
      nextAction = travel(agentCurrentLocation, bestOption)                             # else compute next action
    return nextAction

  agent = ModelReflexAgent(program) # init ModelReflexAgent instance

  return agent

# State Renderer

In [35]:
# buildCell
# gets a string of the given cell
# @param {Map} cell ({A:Integer, G:Integer, T:Integer})
# @param {Agent} agent
# @param {String} mark (default -)
# @return {String}
def buildCell(cell, agent, mark = '-'):
    if mark == '?':
        return ('(%s %s %s)' % (mark, mark, mark))

    if mark == 'V':
        agentCount = mark
        goldCount = str(cell['G']) if cell['G'] > 0 else '-'
        trapCount = str(cell['T']) if cell['T'] > 0 else '-'
        return ('(%s %s %s)' % (agentCount, goldCount, trapCount))

    agentCount = agent.currentDirection.direction[0:1] if cell['A'] > 0 else '-'
    goldCount = str(cell['G']) if cell['G'] > 0 else '-'
    trapCount = str(cell['T']) if cell['T'] > 0 else '-'

    return ('(%s %s %s)' % (agentCount, goldCount, trapCount))
    
# buildHeaders
# build the state headers
# @param {Integer} start column to start (default 0)
# @param {Integer} end column to end (not inclusive, default 5)
# @return {Tuple} (indexes:String, headers:String)
def buildHeaders(start=0, end=5):
    indexes = []
    headerTmpl = '(A G T)'
    headers = []
    
    for _ in range(start, end):
        indexes.append('   %s   ' % start)
        headers.append(headerTmpl)
        start += 1
    
    return (' '.join(indexes)), (' '.join(headers))


# StateRenderer class
# used to display the enviroment state, agent's internal state, and agent's percept
class StateRenderer:
    # initialize state renderer variables
    # @param {Grid} env (required)
    def __init__(self, env = None):
        self.env = env

    # render
    # displays the enviroment state, agent's internal state, and agent's percept 
    # @param {Tuple} startingPoint
    # @param {Tuple} endPoint
    # @param {Agent} agent
    # @param {Function} visibilityChecker
    # @retunr {Void}
    def render(self, startingPoint = (0, 0), endPoint = (4, 4), agent = None, visibilityChecker = None):
        offset = '  '
        indexes, headers = buildHeaders(startingPoint[1], endPoint[1] + 1)
        matrix = [offset + indexes, offset + headers]
        
        for row in range(startingPoint[0], endPoint[0] + 1):
            cells = []
            for col in range(startingPoint[1], endPoint[1] + 1):
                mark = visibilityChecker((row, col)) if visibilityChecker else '-'
                cells.append(
                    buildCell(
                        cell = self.env.state[col + 5 * row],
                        agent = agent,
                        mark = mark
                    )
                )
            matrix.append(str(row) + ' ' + (' '.join(cells)))
        print('\n'.join(matrix))

    # printEnvironment
    # displays the enviroment state
    # @param {Agent} agent
    # @retunr {Void} 
    def printEnvironment(self, agent, visibilityChecker = None):
        self.render(agent = agent, visibilityChecker = visibilityChecker)

    # printAgentPercept
    # displays the agent's percept
    # @param {Agent} agent
    # @param {List<Tuple>} percepts
    # @retunr {Void} 
    def printAgentPercept(self, agent, percepts):
        print('PERCEPT')   
        # print enviroment and exiting early
        # if env is fully observable
        if self.env.isFullyObservable():
            self.printEnvironment(agent=agent)
            return
        
        firstPercept = percepts[0]
        lastPercept = percepts[len(percepts) - 1]
        # special scenarios
        # top left corner
        if agent.location == (0,0):
            firstPercept = agent.location

       # bottom right conter
        if agent.location == (4, 4):
            lastPercept = agent.location

        self.render(startingPoint = firstPercept, endPoint = lastPercept, agent = agent)
        
    # printAgentState
    # displays the agent's internal state (Supported only for Model based reflex agents)
    # @param {Agent} agent
    # @retunr {Void} 
    def printAgentState(self, agent):
        def checkVisibility(coords):
            if agent.visited.intersection({coords}):
                return 'V'
            
            if not agent.visible.intersection({coords}):
                return '?'

            return '-'

        print('AGENT\'S INTERNAL STATE')
        self.render(agent = agent, visibilityChecker = checkVisibility)

# Environment Implementation

In [36]:
# Availale Environment types
FULLY_OBSERVABLE = 'FULLY'
PARTIALLY_OBSERVABLE = 'PARTIALLY'

# randomDirection
# randomly provides a direction
# @return {String} 
def randomDirection():
  return [Direction.R, Direction.D, Direction.L, Direction.U][random.randint(0, 3)]

def createVisitedChecker(agent): 
  def checkVisibility(coords):
    if agent.visited.intersection({coords}):
        return 'V'
    return '-'
  return checkVisibility
# Grid class
# it is used to create fully and partially observable environments
class Grid(Environment):

  # initialize instance variables
  def __init__(self, envType = FULLY_OBSERVABLE):
    super(Grid, self).__init__()
    self.state = []                                         # stores the enviroment state
    self.STEP_COUNT = 0                                     # reference to the steps performed by the agent
    self.stateRender = StateRenderer(self)                  # init state render instance
    for _ in range(25):                                     # fill state range
      self.state.append({"A":0,"G":0,"T":0})                
    self.envType = envType                                  # stores the environment type
    self.MAX_WIDTH = self.MAX_HEIGHT = 5                    # stores the max rows and cols
    print('<STARTING>')

  # List of classes that can go into environment
  def thing_classes(self):
    return [ModelReflexAgent, SimpleReflexAgent, Trap, Gold]


  # percept
  # percept computes what the agent can percept depending on the environment type
  # @param {Agent} agent
  # @return {List}, {List} things, percepts 
  #                        things: list of gold and trap items
  #                        percepts: list of visible cells
  def percept(self, agent):
    things = self.things.copy()  # creating a copy of things
    percepts = []
    
    if self.envType == FULLY_OBSERVABLE:                    # if environment is fully observable
        for i in range(5):                                  # provide all cells as visible
          for j in range(5):
            percepts.append((i, j))

    if self.envType == PARTIALLY_OBSERVABLE:                # if enviroment is partially observable
      movements = [                                         # movements define all possible cells
        (-1, -1), (-1, 0),  (-1, 1),                        # the agent can view
        ( 0, -1)         ,  ( 0, 1),
        ( 1, -1), ( 1, 0),  ( 1, 1)
      ]

      for m in movements:                                       # for each possible visible cell
        newX = agent.location[0] + m[0]                         
        newY = agent.location[1] + m[1]                         # fill percepts list as visible cell
        if newX < 5 and newX >= 0 and newY < 5 and newY >= 0:   # only if they are in the bounding by the enviroment
          percepts.append((newX,newY))
      
      things_to_remove = []                                     # init things_to_remove

      for thing in things:                                      # for each thing in the enviroment
        if (thing.location not in percepts):                    # check if thing is visible by the agent
          things_to_remove.append(thing)                        # if not add it to the things_to_remove

      for thing in things_to_remove:                            # remove the things that are not visible by the agent
        things.remove(thing)

    # printing agent current state
    print(agent)
        # printing enviroment state
    visibilityChecker = None
    if isinstance(agent, ModelReflexAgent):
      visibilityChecker = createVisitedChecker(agent)
    
    self.stateRender.printEnvironment(agent = agent, visibilityChecker = visibilityChecker)
    # printing agent performance
    print('Agent performance: ' + (str(agent.performance)))
    print('\n')
    # printing agent percept
    self.stateRender.printAgentPercept(agent = agent, percepts=percepts)
    print('\n')

    return things, percepts

  # add_thing
  # add a thing to the environment (Support things Agent, Gold, and Trap)
  # @param {Agent|Trap|Gold} thing      (required)
  # @param {Tuple} location (row, col)  (optional)
  # @return {Void}
  def add_thing(self, thing, location = None):
    # set random location if not provided
    thing.location = location if location is not None else self.default_location(thing)

    # if thing is instance of Agent
    if (isinstance(thing, Agent)):
      self.state[thing.location[0]*5 + thing.location[1]]["A"] += 1             # update enviroment state based on location
      thing.performance = 100                                                   # set agent performance
      thing.currentDirection = Direction(randomDirection())                     # set agent direction
      if (isinstance(thing, ModelReflexAgent)):                                 # if agent is a ModelReflexAgent
        thing.visited = set()
        thing.visible = set()
        thing.visited.add(thing.location)                                       # add current location as visited
      self.agents.append(thing)                                                 # register agent
    else:
      if (isinstance(thing,Gold)):                                              # if instance of gold
        self.state[thing.location[0]*5 + thing.location[1]]["G"] += 1           # update enviroment state based on location
      else:
        self.state[thing.location[0]*5 + thing.location[1]]["T"] += 1           # if instance of trap  
      self.things.append(thing)                                                 # update enviroment state based on location

  # execute_action
  # executes an allowed agent action
  # @param {Agent} agent (required)
  # @param {String} action (TURN|ADVANCED|STAY)
  def execute_action(self, agent, action):
    self.STEP_COUNT = self.STEP_COUNT + 1                                       # update agent step counter
    # agent is Model-based reflex 
    # print agent's internal state
    if isinstance(agent, ModelReflexAgent):           
      self.stateRender.printAgentState(agent)
      print('\n')
    # print current step
    print('<STEP %s>' % (self.STEP_COUNT))
    # print selected action
    print('SELECT ACTION: %s' % (action))

    if action == TURN:                                                          # if action is TURN
      agent.turn()                                                              # call agent turn method
      self.consumeThingsAtAgentLocation(agent)                                  # consume things at agent location
    elif action == ADVANCED:                                                    # if action is ADVANCED
      previousLocation = agent.location                                         # move agent
      if agent.forward():                                                       # if agent moved
        self.state[previousLocation[0] * 5 + previousLocation[1]]['A'] = 0      # update enviroment state based on new agent location
        self.state[agent.location[0] * 5 + agent.location[1]]['A'] = 1

      self.consumeThingsAtAgentLocation(agent)                                  # consume things at agent location
    elif action == STAY:                                                        # if action is STAY
      self.consumeThingsAtAgentLocation(agent)                                  # consume things at agent location                                  

  # generate a random location for the given thing
  # @return {Tuple} (row, col)
  def default_location(self, thing):
    # generate random x, y
    x = random.randint(0, self.MAX_WIDTH - 1)
    y = random.randint(0, self.MAX_HEIGHT - 1)
    return (x, y)

  # consumeThingsAtAgentLocation
  # consume things at agent location (modifies agent's performance based on the thing at the given location)
  # @param {Agent} agent
  # @return {Void}
  def consumeThingsAtAgentLocation(self, agent):
    ores = self.list_things_at(agent.location, Gold)                          # retrieve gold items
    traps = self.list_things_at(agent.location, Trap)                         # retrieve trap items

    # if gold items
    # update enviroment state based on gold item location
    # delete gold item from things
    # update agent performance
    if len(ores) > 0:
      ore = ores[0]
      self.state[ore.location[0] * 5 + ore.location[1]]['G'] = self.state[ore.location[0] * 5 + ore.location[1]]['G'] - 1
      self.delete_thing(ore)
      agent.modifyPerformance(10)

    # if traps items
    # update enviroment state based on trap item location
    # delete trap item from things
    # update agent performance
    if len(traps) > 0:
      trap = traps[0]
      self.state[trap.location[0] * 5 + trap.location[1]]['T'] = self.state[trap.location[0] * 5 + trap.location[1]]['T'] - 1
      self.delete_thing(traps[0])
      agent.modifyPerformance(-5)

  # isFullyObservable
  # returns true if the enviroment type is FULLY_OBSERVABLE
  # @return {Boolean}
  def isFullyObservable(self):
    return self.envType == FULLY_OBSERVABLE

# Fully Observable Environment and Simple reflex anget

In [37]:
fullyObservableEnv = Grid(envType = FULLY_OBSERVABLE)

# adding Trap
for i in range(random.randint(4, 8)):
  fullyObservableEnv.add_thing(Trap())

 # adding Gold
for i in range(random.randint(4, 8)):
    fullyObservableEnv.add_thing(Gold())

fullyObservableEnv.add_thing(createSimpleReflexAgent())

fullyObservableEnv.run()

<STARTING>
Agent state: (2, 1, left)
     0       1       2       3       4   
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (- - 1) (- - -) (- - -) (- 1 -) (- - -)
1 (- - -) (- - -) (- - 1) (- - -) (- - -)
2 (- - 1) (l - -) (- - 1) (- - -) (- - -)
3 (- - -) (- 1 -) (- 1 -) (- - -) (- - -)
4 (- - -) (- - -) (- 1 -) (- - 1) (- - -)
Agent performance: 100


PERCEPT
     0       1       2       3       4   
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (- - 1) (- - -) (- - -) (- 1 -) (- - -)
1 (- - -) (- - -) (- - 1) (- - -) (- - -)
2 (- - 1) (l - -) (- - 1) (- - -) (- - -)
3 (- - -) (- 1 -) (- 1 -) (- - -) (- - -)
4 (- - -) (- - -) (- 1 -) (- - 1) (- - -)


<STEP 1>
SELECT ACTION: Turn
Agent state: (2, 1, up)
     0       1       2       3       4   
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (- - 1) (- - -) (- - -) (- 1 -) (- - -)
1 (- - -) (- - -) (- - 1) (- - -) (- - -)
2 (- - 1) (u - -) (- - 1) (- - -) (- - -)
3 (- - -) (- 1 -) (- 1 -) (- - -) (- - -)
4 (- - -) (- - -) (- 1 -) (- - 1) (

# Partially Observable Environment and Model-based reflex agent

In [38]:
partiallyObservableEnv = Grid(envType = PARTIALLY_OBSERVABLE)

# adding Trap
for i in range(random.randint(4, 8)):
  partiallyObservableEnv.add_thing(Trap())

 # adding Gold
for i in range(random.randint(4, 8)):
  partiallyObservableEnv.add_thing(Gold())

partiallyObservableEnv.add_thing(createModelReflexAgent())

partiallyObservableEnv.run()

<STARTING>
Agent state: (0, 1, down)
     0       1       2       3       4   
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (- - -) (V - -) (- - -) (- - -) (- - 1)
1 (- - 1) (- - -) (- - -) (- 1 -) (- 1 -)
2 (- - -) (- - 2) (- - -) (- - 1) (- 1 -)
3 (- 1 -) (- - -) (- - -) (- - 1) (- - -)
4 (- - -) (- - 1) (- 2 -) (- - -) (- - -)
Agent performance: 100


PERCEPT
     0       1       2   
  (A G T) (A G T) (A G T)
0 (- - -) (d - -) (- - -)
1 (- - 1) (- - -) (- - -)


AGENT'S INTERNAL STATE
     0       1       2       3       4   
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (- - -) (V - -) (- - -) (? ? ?) (? ? ?)
1 (- - 1) (- - -) (- - -) (? ? ?) (? ? ?)
2 (? ? ?) (? ? ?) (? ? ?) (? ? ?) (? ? ?)
3 (? ? ?) (? ? ?) (? ? ?) (? ? ?) (? ? ?)
4 (? ? ?) (? ? ?) (? ? ?) (? ? ?) (? ? ?)


<STEP 1>
SELECT ACTION: Advance
Agent state: (1, 1, down)
     0       1       2       3       4   
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (- - -) (V - -) (- - -) (- - -) (- - 1)
1 (- - 1) (V - -) (- - -) (

# Fully Observable Environment and Model-based reflex agent

In [39]:
fullyObservableEnv = Grid(envType = FULLY_OBSERVABLE)

# adding Trap
for i in range(random.randint(4, 8)):
  fullyObservableEnv.add_thing(Trap())

 # adding Gold
for i in range(random.randint(4, 8)):
    fullyObservableEnv.add_thing(Gold())

fullyObservableEnv.add_thing(createModelReflexAgent())

fullyObservableEnv.run()

<STARTING>
Agent state: (3, 0, left)
     0       1       2       3       4   
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (- - -) (- 1 1) (- - 1) (- - 1) (- - -)
1 (- 1 -) (- - -) (- - -) (- - -) (- - 1)
2 (- - -) (- - -) (- - -) (- - -) (- 1 -)
3 (V - -) (- 1 -) (- - -) (- - -) (- - -)
4 (- - -) (- - 1) (- - -) (- - -) (- - 1)
Agent performance: 100


PERCEPT
     0       1       2       3       4   
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (- - -) (- 1 1) (- - 1) (- - 1) (- - -)
1 (- 1 -) (- - -) (- - -) (- - -) (- - 1)
2 (- - -) (- - -) (- - -) (- - -) (- 1 -)
3 (l - -) (- 1 -) (- - -) (- - -) (- - -)
4 (- - -) (- - 1) (- - -) (- - -) (- - 1)


AGENT'S INTERNAL STATE
     0       1       2       3       4   
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (- - -) (- 1 1) (- - 1) (- - 1) (- - -)
1 (- 1 -) (- - -) (- - -) (- - -) (- - 1)
2 (- - -) (- - -) (- - -) (- - -) (- 1 -)
3 (V - -) (- 1 -) (- - -) (- - -) (- - -)
4 (- - -) (- - 1) (- - -) (- - -) (- - 1)


<STEP 1>
SELECT ACTIO

## Partially Observable Environment and Simple reflex agent

In [40]:
partiallyObservableEnv = Grid(envType = PARTIALLY_OBSERVABLE)

# adding Trap
for i in range(random.randint(4, 8)):
  partiallyObservableEnv.add_thing(Trap())

 # adding Gold
for i in range(random.randint(4, 8)):
  partiallyObservableEnv.add_thing(Gold())

partiallyObservableEnv.add_thing(createSimpleReflexAgent())

partiallyObservableEnv.run()

<STARTING>
Agent state: (3, 3, down)
     0       1       2       3       4   
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (- 1 1) (- 1 1) (- - 1) (- 1 -) (- - 1)
1 (- - -) (- - -) (- - 1) (- - -) (- 2 -)
2 (- - -) (- - -) (- - -) (- - 1) (- - -)
3 (- - -) (- - -) (- - -) (d - -) (- 1 -)
4 (- 1 -) (- - -) (- - -) (- - 1) (- - 1)
Agent performance: 100


PERCEPT
     2       3       4   
  (A G T) (A G T) (A G T)
2 (- - -) (- - 1) (- - -)
3 (- - -) (d - -) (- 1 -)
4 (- - -) (- - 1) (- - 1)


<STEP 1>
SELECT ACTION: Turn
Agent state: (3, 3, left)
     0       1       2       3       4   
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (- 1 1) (- 1 1) (- - 1) (- 1 -) (- - 1)
1 (- - -) (- - -) (- - 1) (- - -) (- 2 -)
2 (- - -) (- - -) (- - -) (- - 1) (- - -)
3 (- - -) (- - -) (- - -) (l - -) (- 1 -)
4 (- 1 -) (- - -) (- - -) (- - 1) (- - 1)
Agent performance: 99


PERCEPT
     2       3       4   
  (A G T) (A G T) (A G T)
2 (- - -) (- - 1) (- - -)
3 (- - -) (l - -) (- 1 -)
4 (- - -) (- - 1) (