We had to copy this code to fix a few bugs, plus I want to make some other modifications: When the arrow was fired it would stay in a fixed location and so the loop, which concludes when the arrow goes out of bounds gets stuck in an infinite loop. Another is that pits were being generated at the agents starting point, hardly fair on the guy (or gal, or...)! 

In [124]:
from agents2 import *

# stole from SO
def powerset(s):
    x = len(s)
    masks = [1 << i for i in range(x)]
    for i in range(1 << x):
        yield [ss for mask, ss in zip(masks, s) if i & mask]

class WumpusEnvironment(XYEnvironment):
    pit_probability = 0.2  # Probability to spawn a pit in a location. (From Chapter 7.2)

    # Room should be 4x4 grid of rooms. The extra 2 for walls

    def __init__(self, width=6, height=6):
        super().__init__(width, height)
        self.init_world()

    def init_world(self):
        """Spawn items in the world based on probabilities from the book"""

        "WALLS"
        self.add_walls()

        "PITS"
        for x in range(self.x_start, self.x_end):
            for y in range(self.y_start, self.y_end):
                if random.random() < self.pit_probability:
                    if x == 1 and y == 1:
                        continue
                    self.add_thing(Pit(), (x, y), True)
                    self.add_thing(Breeze(), (x - 1, y), True)
                    self.add_thing(Breeze(), (x, y - 1), True)
                    self.add_thing(Breeze(), (x + 1, y), True)
                    self.add_thing(Breeze(), (x, y + 1), True)

        "WUMPUS"
        '''
        w_x, w_y = self.random_location_inbounds(exclude=(1, 1))
        self.add_thing(Wumpus(), (w_x, w_y), True)
        self.add_thing(Stench(), (w_x - 1, w_y), True)
        self.add_thing(Stench(), (w_x + 1, w_y), True)
        self.add_thing(Stench(), (w_x, w_y - 1), True)
        self.add_thing(Stench(), (w_x, w_y + 1), True)
        '''
        "GOLD"
        self.add_thing(Gold(), self.random_location_inbounds(exclude=(1, 1)), True)

    def get_world(self, show_walls=True):
        """Return the items in the world"""
        result = []
        x_start, y_start = (0, 0) if show_walls else (1, 1)

        if show_walls:
            x_end, y_end = self.width, self.height
        else:
            x_end, y_end = self.width - 1, self.height - 1

        for x in range(x_start, x_end):
            row = []
            for y in range(y_start, y_end):
                row.append(self.list_things_at((x, y)))
            result.append(row)
        return result

    def percepts_from(self, agent, location, tclass=Thing):
        """Return percepts from a given location,
        and replaces some items with percepts from chapter 7."""
        thing_percepts = {
            Gold: Glitter(),
            Wall: Bump(),
            Wumpus: Stench(),
            Pit: Breeze()}

        """Agents don't need to get their percepts"""
        thing_percepts[agent.__class__] = None

        """Gold only glitters in its cell"""
        if location != agent.location:
            thing_percepts[Gold] = None

        result = [thing_percepts.get(thing.__class__, thing) for thing in self.things
                  if thing.location == location and isinstance(thing, tclass)]
        return result if len(result) else [None]

    def percept(self, agent):
        """Return things in adjacent (not diagonal) cells of the agent.
        Result format: [Left, Right, Up, Down, Center / Current location]"""
        x, y = agent.location
        result = []
        result.append(self.percepts_from(agent, (x, y)))

        """The wumpus gives out a loud scream once it's killed."""
        wumpus = [thing for thing in self.things if isinstance(thing, Wumpus)]
        if len(wumpus) and not wumpus[0].alive and not wumpus[0].screamed:
            result[-1].append(Scream())
            wumpus[0].screamed = True

        return result

    def execute_action(self, agent, action):
        """Modify the state of the environment based on the agent's actions.
        Performance score taken directly out of the book."""
        if isinstance(agent, Explorer) and self.in_danger(agent):
            return
            
        agent.bump = False
        if action in ['TurnRight', 'TurnLeft', 'Forward', 'Grab']:
            super().execute_action(agent, action)
            agent.performance -= 1
        elif action == 'Climb':
            if agent.location == (1, 1):  # Agent can only climb out of (1,1)
                agent.performance += 1000 if Gold() in agent.holding else 0
                self.delete_thing(agent)
        elif action == 'Shoot':
            """The arrow travels straight down the path the agent is facing"""
            if agent.has_arrow:
                arrow_travel = agent.direction.move_forward(agent.location)
                while self.is_inbounds(arrow_travel):
                    wumpus = [thing for thing in self.list_things_at(arrow_travel)
                              if isinstance(thing, Wumpus)]
                    if len(wumpus):
                        wumpus[0].alive = False
                        break
                    arrow_travel = agent.direction.move_forward(arrow_travel)
                agent.has_arrow = False

    def in_danger(self, agent):
        """Check if Explorer is in danger (Pit or Wumpus), if he is, kill him"""
        for thing in self.list_things_at(agent.location):
            if isinstance(thing, Pit) or (isinstance(thing, Wumpus) and thing.alive):
                print("final location: ", agent.location)
                agent.alive = False
                agent.performance -= 1000
                agent.killed_by = thing.__class__.__name__
                return True
        return False

    def is_done(self):
        """The game is over when the Explorer is killed
        or if he climbs out of the cave only at (1,1)."""
        explorer = [agent for agent in self.agents if isinstance(agent, Explorer)]
        if len(explorer):
            if explorer[0].alive:
                return False
            else:
                print("Death by {} [-1000].".format(explorer[0].killed_by))
        else:
            print("Explorer climbed out {}."
                  .format("with Gold [+1000]!" if Gold() not in self.things else "without Gold [+0]"))
        return True
        

class Explorer(Agent):
    def __init__(self):
        super().__init__()
        self.holding = []
        self.has_arrow = True
        self.killed_by = ""
        self.direction = Direction("right")
        # The breeze matrix is, initially, a matrix of None. 
        # If we perceive a breeze we set that entry to True
        # if we do not (then that will not change) and so
        # we set it to False.
        self.breeze_matrix = []
        self.pit_matrix = []
        # Split the domain into three types: Other, Frontier, Known and Wall.
        self.status_matrix = []
        self.create_matrices()

    def create_matrices(self):
        self.breeze_matrix = [[None] * 6 for _ in range(6)]
        self.pit_matrix = [[0] * 6 for _ in range(6)]
        self.status_matrix = [['O'] * 6 for _ in range(6)]
        self.status_matrix[1][1] = 'K'
        self.status_matrix[0][1] = 'F'
        self.status_matrix[2][1] = 'F'
        self.status_matrix[1][0] = 'F'
        self.status_matrix[1][2] = 'F'
        
    def can_grab(self, thing):
        """Explorer can only grab gold"""
        return thing.__class__ == Gold

    def percept_location(self, location, i):
        if(i == 0):
            return [location[0] - 1, location[1]]
        if(i == 1):
            return [location[0] + 1, location[1]]
        if(i == 2):
            return [location[0], location[1] - 1]
        if(i == 3):
            return [location[0], location[1] + 1]
        if(i == 4):
            return [location[0], location[1]]
            
    def update_breeze_and_bump(self, percept):
        self.breeze_matrix[1][1] = False
        self.breeze_matrix[1][2] = True
        self.breeze_matrix[2][1] = True
        self.status_matrix[1][2] = 'K'
        self.status_matrix[2][1] = 'K'
        self.status_matrix[2][2] = 'F'
        self.status_matrix[0][2] = 'F'
        self.status_matrix[2][0] = 'F'
        self.status_matrix[1][3] = 'F'
        self.status_matrix[3][1] = 'F'
        '''
        loc = self.location
        found_breeze  = False
        for p in percept[0]:
            if(isinstance(p, Breeze)):
                self.breeze_matrix[loc[0]][loc[1]] = True
                found_breeze = True
            if(self.bump):
                move_to = self.direction.move_forward(self.location)
                self.status_matrix[move_to[0]][move_to[1]] = 'W'
        if not found_breeze:
            self.breeze_matrix[loc[0]][loc[1]] = False
        '''     

    def update_frontier(self, action):
        if action == 'Forward':
            move_to = self.direction.move_forward(self.location)
            if move_to[0] == 0 or move_to[0] == 5 or move_to[1] == 0 or move_to[1] == 5:
                return
            self.status_matrix[move_to[0]][move_to[1]] = 'K'
            if(self.status_matrix[move_to[0] + 1][move_to[1]] == 'O'):
                self.status_matrix[move_to[0] + 1][move_to[1]] = 'F'
            if(self.status_matrix[move_to[0] - 1][move_to[1]] == 'O'):
                self.status_matrix[move_to[0] - 1][move_to[1]] = 'F'           
            if(self.status_matrix[move_to[0]][move_to[1] + 1] == 'O'):
                self.status_matrix[move_to[0]][move_to[1] + 1] = 'F'
            if(self.status_matrix[move_to[0]][move_to[1] - 1] == 'O'):
                self.status_matrix[move_to[0]][move_to[1] - 1] = 'F'    

    def get_frontier(self):
        frontier = []
        for i in range(6):
            for j in range(6):
                if self.status_matrix[i][j] == 'F':
                    frontier.append((i,j))
        return frontier

    def get_breeze_squares(self):
        breeze = []
        not_breeze = []
        for i in range(6):
            for j in range(6):
                if self.breeze_matrix[i][j] == True :
                    breeze.append((i,j))
                if self.breeze_matrix[i][j] == False :
                    not_breeze.append((i,j))
        return breeze, not_breeze

    def near(self, query, candidates):
        if query in candidates:
            return True
        if query[0] > 0 and (query[0] - 1, query[1]) in candidates:
            return True
        if query[0] < 5 and (query[0] + 1, query[1]) in candidates:
            return True
        if query[1] > 0 and (query[0], query[1] - 1) in candidates:
            return True
        if query[1] < 5 and (query[0], query[1] + 1) in candidates:
            return True
        return False
    
    def calculate_probabilities(self):
        # if we learn that the square does not have a breeze
        # then it can not be near a pit
        for i in range(6):
            for j in range(6):
                if self.breeze_matrix[i][j] == False:
                    self.pit_matrix[i][j] = 0

        # If we are in the square, we can not be hovering above a pit.
        self.pit_matrix[self.location[0]][self.location[1]] = 0
        
        frontier = self.get_frontier()
        breeze, not_breeze = self.get_breeze_squares()
        index = [i for i in range(len(frontier))]

        # For each square on the frontier, we need to split the
        # frontier into a set that contains that square and a
        # set that does not.

        for sq in frontier:
            contains_sq = []
            not_contains_sq = []
            for s in powerset(index):
                sqs = [frontier[i] for i in s]
                if sq in sqs:
                    contains_sq.append(s)
                else:
                    not_contains_sq.append(s)

            print("SQUARE: ", sq)
            print("CONTAINS:") 
            for c in contains_sq:
                print([frontier[i] for i in c])

            # first pass
            fp_index_lists = []
            for p in contains_sq:
                if len(p) == 0:
                    continue
                P = [frontier[i] for i in p]
                Flags = [False for i in range(len(p))]
                for i in range(len(P)):
                    if self.near(P[i], breeze) and not self.near(P[i], not_breeze):
                        Flags[i] = True
                if Flags.count(True) == len(Flags):
                    fp_index_lists.append(p)

            # second pass
            consis_index = []
            for il in fp_index_lists:
                ilsqs = [frontier[i] for i in il]
                Flags = [False for i in range(len(breeze))]
                for i in range(len(breeze)):
                    if self.near(breeze[i], ilsqs):
                        Flags[i] = True
                if Flags.count(True) == len(Flags):
                    consis_index.append(il)
            
            print("CONSIS CONTAINS:") 
            for c in consis_index:
                print([frontier[i] for i in c])
            '''
        
            prob_with_square = 0
            prob_without_square = 0
            frontier_length = len(all_frontier) - 1
            for e in without_sq:
                #print("without sq: ", sq, " e: ", e)
                prob_without_square += 0.2 ** len(e) * 0.8 ** (frontier_length - len(e))
            for e in with_sq:
                #print("with sq: ", sq, " e: ", e)
                prob_with_square += 0.2 ** (len(e) - 1) * 0.8 ** (frontier_length - len(e) + 1)

            prob_with_square *= 0.2
            prob_without_square *= 0.8

            alpha = prob_with_square + prob_without_square

            prob_with_square /= alpha
            prob_without_square /= alpha

            if self.pit_matrix[sq[0]][sq[1]] < prob_with_square:
                self.pit_matrix[sq[0]][sq[1]] = prob_with_square
            '''

    def print_matrices(self):
        for r in self.breeze_matrix:
           print(r)
        for r in self.pit_matrix:
          print([float("{:.2f}".format(x)) for x in r])
        for r in self.status_matrix:
            print(r)
            
    # Converts percepts to actions. We NEED access to the class
    def program(self, percept):
        print(self.location, percept) 
        self.update_breeze_and_bump(percept)
        self.calculate_probabilities()
        self.print_matrices()
        action =  random.choice(['TurnLeft', 'TurnRight', 'Forward'])
        self.update_frontier(action)
        print(" I CHOOOOSE: ", action)
        return action


In [125]:
env = WumpusEnvironment()
agent = Explorer()
env.add_thing(agent, (1, 1), True)

print("-------------------------------")
world = env.get_world()
for w in world:
    print(w)
print("-------------------------------")

env.run()

-------------------------------
[[<Wall>], [<Wall>], [<Wall>], [<Wall>], [<Wall>], [<Wall>]]
[[<Wall>], [<Explorer>], [<Breeze>], [<Pit>, <Breeze>], [<Breeze>], [<Wall>]]
[[<Wall>], [<Breeze>], [<Pit>, <Breeze>], [<Breeze>, <Pit>], [<Breeze>], [<Wall>]]
[[<Wall>], [], [<Breeze>], [<Breeze>], [], [<Wall>]]
[[<Wall>], [], [], [], [<Gold>], [<Wall>]]
[[<Wall>], [<Wall>], [<Wall>], [<Wall>], [<Wall>], [<Wall>]]
-------------------------------
(1, 1) [[None]]
SQUARE:  (0, 1)
CONTAINS:
[(0, 1)]
[(0, 1), (0, 2)]
[(0, 1), (1, 0)]
[(0, 1), (0, 2), (1, 0)]
[(0, 1), (1, 3)]
[(0, 1), (0, 2), (1, 3)]
[(0, 1), (1, 0), (1, 3)]
[(0, 1), (0, 2), (1, 0), (1, 3)]
[(0, 1), (2, 0)]
[(0, 1), (0, 2), (2, 0)]
[(0, 1), (1, 0), (2, 0)]
[(0, 1), (0, 2), (1, 0), (2, 0)]
[(0, 1), (1, 3), (2, 0)]
[(0, 1), (0, 2), (1, 3), (2, 0)]
[(0, 1), (1, 0), (1, 3), (2, 0)]
[(0, 1), (0, 2), (1, 0), (1, 3), (2, 0)]
[(0, 1), (2, 2)]
[(0, 1), (0, 2), (2, 2)]
[(0, 1), (1, 0), (2, 2)]
[(0, 1), (0, 2), (1, 0), (2, 2)]
[(0, 1), (1, 3)

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [24]:
2 ** 5

32