In [1]:
!pip install agentpy
# https://colab.research.google.com/drive/14n7Wpo_o9CrkdOofpko7bfyI6bsgt1xL?usp=sharing

Collecting agentpy
  Downloading agentpy-0.1.5-py3-none-any.whl.metadata (3.3 kB)
Collecting SALib>=1.3.7 (from agentpy)
  Downloading salib-1.5.1-py3-none-any.whl.metadata (11 kB)
Collecting multiprocess (from SALib>=1.3.7->agentpy)
  Downloading multiprocess-0.70.17-py311-none-any.whl.metadata (7.2 kB)
Collecting dill>=0.3.9 (from multiprocess->SALib>=1.3.7->agentpy)
  Downloading dill-0.3.9-py3-none-any.whl.metadata (10 kB)
Downloading agentpy-0.1.5-py3-none-any.whl (53 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.9/53.9 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading salib-1.5.1-py3-none-any.whl (778 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m778.9/778.9 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading multiprocess-0.70.17-py311-none-any.whl (144 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m144.3/144.3 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.9-py3-none

### Explorer agent ###

The following code simulates an explorer that crosses an environment with cells representing road, grass, water, and mountain. Address the following tasks:


1.   Propose a solution to minimize the total travel cost with the set values.
2.   Modify the solution to simulate an agent that prefers walking through grass than on the road.
3.   Modify the solution to simulate an agent that prefers climbing mountains than swimming.



In [2]:
import agentpy as ap
import numpy as np
import random, json
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import seaborn as sns, IPython
from matplotlib import pyplot as plt, cm

class MazeAgent(ap.Agent):
    '''
    Initializing agent elements:
    - 4 possible actions
    '''
    def setup(self):
        # Actions are linked to a movement in the grid.
        self.actions = {'up': (-1,0), 'down': (1, 0), 'left': (0, -1), 'right': (0, 1)}
        self.env = self.model.env
        self.reward = 0
        self.p = self.model.p

        # Learning policies
        self.epsilon = self.p.epsilon
        self.alpha = self.p.alpha
        self.gamma = self.p.gamma

        self.Q = {}
        for i in range(self.env.shape[0]):
          for j in range(self.env.shape[1]):
            self.Q[(i, j)] = {action: 0 for action in self.actions}

    '''
    Training. Agent will be able to perform a number of possible episode.
    An episode is a complete cycle, until agent reaches the goal
    '''
    def train(self):
      for k in range(self.p.train_episodes):
          state = self.p.init                                # Initial position
          while state != self.p.goal:                            # Iterate until agent reaches the goal
              action = self.execute()                 # Choose & execute action
              new_state = self.get_position()
              reward = self.env.get_reward(new_state)                  # Get action reward
              self.update_Q(state, action, reward, new_state)    # Update Q-values
              state = new_state
          self.env.move_to(self, self.p.init)
          self.env.setup()                                    # Initialize envornment
      self.reward = 0
      print("FINISH TRAINING")

    '''
    Updating Q-values according to definition
    '''
    def update_Q(self, state, action, reward, new_state):
        max_Q_new_state = max(self.Q[new_state].values())
        self.Q[state][action] = self.Q[state][action] + self.alpha * (
            reward + self.gamma * max_Q_new_state - self.Q[state][action])

    '''
    Actual action execution. This process will be employed after agent has trained
    '''
    def execute(self):
        action = self.choose_action(self.get_position())
        self.env.move_by(self, self.actions[action])
        self.reward += self.env.get_reward(self.get_position())
        return action

    '''
    Get position of agent in environment
    '''
    def get_position(self):
        return self.env.positions[self]

    '''
    Dumb agent chooses a random action
    '''
    def choose_action(self, state):
      if random.uniform(0, 1) < self.epsilon:
        return random.choice(list(self.actions.keys()))
      else:
        return max(self.Q[state], key=self.Q[state].get)


'''
'''
class Maze(ap.Grid):
    def setup(self):
        # Initialize the maze environment
        self.environment = np.copy(self.p.maze)

    '''
    Reward function. The returned value is used to update agent's reward
    '''
    def get_reward(self, state):
      if state == self.p.goal:
        return self.p.goal_value
      elif self.environment[state] == i or self.environment[state] == b:
        return -self.p.goal_value
      else:
        value = self.environment[state]
        return -value

'''
'''
class MazeModel(ap.Model):
    def setup(self):
        self.env = Maze(self, shape=maze.shape)
        self.agent = MazeAgent(self)
        self.env.add_agents([self.agent], positions=[self.p.init])
        self.agent.train()
        self.agent.epsilon = 0

    def step(self):
        self.agent.execute()

    def update(self):
        if self.agent.get_position() == self.model.p.goal:
            print('ending')
            self.stop()

    # Report found route and Q-values
    def end(self):
        self.report('Q-Table', self.agent.Q)




def animation_plot(model, ax):
    N, M = model.p.maze.shape
    grid = np.copy(maze)
    grid[model.p.goal] = goal
    agent = list(model.env.agents)[0]
    state = model.env.positions[agent]
    grid[state] = explorer

    # Colors: black = edge, white = floor, green = goal, blue = agent
    color_dict = {s:'#000000', r: '#7c4700', c: '#2a9dfb', p: '#006400', i:'#d3d3d3', b: '#ffffff', explorer:'#ff0000', goal:'#d4af37'}

    ap.gridplot(grid, ax=ax, color_dict=color_dict, convert=True)
    ax.set_title("Agent Q-Learning\nTravel cost: {}\n{}: {}".format(agent.reward, state, agent.Q[state]))

# Value codification for plotting
# Street in good condition: Time = 1 unit.
# Dirt road: Time = 2 units.
# Street with cracks and water leaks: Time = 4 units.
# Potholes: Time = 5 units.
# Road closures: Impossible to pass (encoded as -1).
# Buildings: Impossible to pass (encoded as -10)
s, r, c, p, i, b = 1, 2, 4, 5, -1, -10
explorer = -101
goal = -102


# Environment representation with a grid
maze = np.load('sample_data/streets-2.npy')

parameters = {
    'maze': maze,
    'init': (0,6),
    'goal':  (21, 18),
    'goal_value': 1000,
    'steps': 200,
    'train_episodes': 10000,
    'epsilon': 0.4,
    'alpha': 1,
    'gamma': 1,
}

fig = plt.figure(figsize=(7,7))
ax = fig.add_subplot(111)
mazeModel = MazeModel(parameters)
animation = ap.animate(mazeModel, fig, ax, animation_plot)
IPython.display.HTML(animation.to_jshtml())

FINISH TRAINING
ending


(5 pts) Propose appropriate tools and techniques to solve the problem. Justify your selection.

En este caso estoy usando un libreria llamada agentpy que es un framework para agentes. El metodo de aprendizaje del agente se llama Q-learning. A través de simulaciones y "prueba y error", el agente para detrminando cual es el mejor camino para llegar a le meta con el mejor reward que pueda encontrar.

(5 pts) Clearly describe the agent and its environment, defining all necessary components (states, actions, goal, utility).

Al agente está en una ciudad cuadriculada separada por calles. Las calles estan en diferentes condiciones y le quitan mas o menos valor al agente.

El estado del agente esta determinado por su posicion en la cuadricula.

El agente solo se puede mover para arriba, abajo, derecha o izquierda.

El objetivo de la agente es llegar a un estado llamado goal marcado como amarillo en la cuadricula.

Por cada movimiento relizado del agente, este acumula una utilidad segun el espacio en la cuadricula. Todo le restan utilidad, excepto el estado de goal

(10 pts) Provide an implementation of the agent and an appropriate visualization of the environment.
(10 pts) Integrate your chosen tools or techniques to select the optimal path for the agent.
(5 pts) Provide the best route found with your implementation.
(5 pts) Show an animation of the agent's movement from the starting position (home) to the goal (work), illustrating the solution in action.