In [None]:
from agents import Environment, Thing, Agent
from collections import namedtuple
from copy import deepcopy
from random import randrange

Position = namedtuple("Position", "x y")


class CaveEnvironment(Environment):
    def __init__(self, map_items, height=6, width=6, agent=None):
        super().__init__()
        self.width = width
        self.height = height
        self.matrix = [[None for _ in range(height)] for _ in range(width)]
        self.steps = 0
        self.treasures = 0
        if agent is not None:
            self.agents.append(agent)
        for thing in map_items:
            if isinstance(thing["item"], MyThing) and thing["item"].value() > 0:
                self.treasures += 1
            self.matrix[thing["x"]][thing["y"]] = thing["item"]
        # print("Treasures are {}".format(self.treasures))
        # print("At Init")
        # self.print_board()
          
    def percept(self, agent):
        def get_item(x, y):
            try:
                if x < 0 or y < 0:
                    return Outside()
                return self.matrix[x][y]
            except IndexError:
                return Outside()

        x = agent.x
        y = agent.y
        return (Position(x, y), [self.matrix[x][y], 
                get_item(x - 1, y),
                get_item(x + 1, y),
                get_item(x, y - 1),
                get_item(x, y + 1), ])

    def execute_action(self, agent, action):
        if "Move" in action:
            agent.move(action.split(" ")[1])
        elif action is "Take":
            item = self.matrix[agent.x][agent.y]
            self.matrix[agent.x][agent.y] = None
            self.treasures -= 1
            agent.take(item)
    
    def print_board(self, agent=None):
        print("Environment:")
        if agent is None:
            m = self.matrix
        else:
            m = deepcopy(self.matrix)
            m[agent.x][agent.y] = agent
        for row in zip(*m):
            print("-" * (len(row) * 2 + 1))
            for item in row:
                if isinstance(item, Agent):
                    label = "A" 
                elif isinstance(item, Thing):
                    label = item.label()
                else:
                    label = "-"
                
                print("|{}".format(label), end="")
            print("|")
        print("-" * (len(m) * 2 + 1))
        
    def print_percept(self, percept, new_pos=None):
        m = [[Unknown() for _ in range(self.height)] for _ in range(self.width)]
        x, y = percept[0]
        percepts = percept[1]
        if new_pos is None:
            m[x][y] = ""
        else:
            m[x][y] = self.matrix[x][y]
            
        if not isinstance(percepts[1], Outside):
            m[x - 1][y] = percepts[1]
        if not isinstance(percepts[2], Outside):    
            m[x + 1][y] = percepts[2]
        if not isinstance(percepts[3], Outside):
            m[x][y - 1] = percepts[3]
        if not isinstance(percepts[4], Outside):
            m[x][y + 1] = percepts[4]
        
        if new_pos is not None:
            m[new_pos.x][new_pos.y] = ""
        
        print("Agent percept:")
        for row in zip(*m):
            print("-" * (len(row) * 2 + 1))
            for item in row:
                if isinstance(item, Thing):
                    label = item.label()
                elif item is None:
                    label = "-"
                else:
                    label = "A"
                
                print("|{}".format(label), end="")
            print("|")
        print("-" * (len(m[0]) * 2 + 1))

    def step(self):
        if self.is_done():
            print("Finished at step {}".format(self.steps))
            return
        
        self.steps += 1
        for i, agent in enumerate(self.agents):
            print("\n")
            print("-" * 30)
            print("Agent {}, step {}".format(i, self.steps))
            print("-" * 30)
            
            print("BEFORE")
            self.print_board(agent)
            percept = self.percept(agent)
            self.print_percept(percept)
            print("Agent performance: {}".format(agent.performance))
            
            print("ACTION")
            print("Percept: {}".format(percept))
            action = agent.program(percept)
            print("Action: {}".format(action))
            self.execute_action(agent, action)
            
            print("AFTER")
            self.print_board(agent)
            self.print_percept(percept, new_pos=Position(agent.x, agent.y))
            print("Agent performance: {}".format(agent.performance))

    def is_done(self):
        return not self.treasures > 0


class Explorer(Agent):
    def __init__(self, position=None, program=None):
        super().__init__(program)
        if position is None:
            self.x = 0
            self.y = 0
        else:
            self.x = position.x
            self.y = position.y
            
        self.performance = 0
 
    def move(self, direction):
        self.performance -= 1
        if direction == "Up":
            self.y -= 1
        if direction == "Down":
            self.y += 1
        if direction == "Right":
            self.x += 1
        if direction == "Left":
            self.x -= 1
    
    def take(self, item):
        self.performance += item.value()


class MyThing(Thing):
    def value(self):
        raise NotImplementedError
    
    def label(self):
        raise NotImplementedError


class Diamond(MyThing):
    def value(self):
        return 20

    def label(self):
        return "D"


class Idol(MyThing):
    def value(self):
        return 10

    def label(self):
        return "I"


class ArrowTrap(MyThing):
    def value(self):
        return -5

    def label(self):
        return "T"


class FloorTrap(MyThing):
    def value(self):
        return -8

    def label(self):
        return "F"


class Outside(Thing):
    def label(self):
        return "O"


class Wall(Thing):
    def label(self):
        return "W"


class Unknown(Thing):
    def label(self):
        return "?"


def program(percepts):
    options = []
    for i, percept in enumerate(percepts[1], 0):
        if isinstance(percept, Wall) and i == 0:
            return "NoOp"
        if isinstance(percept, Outside):
            options.append({"action": choose_action(i), "value": -11})
        if isinstance(percept, MyThing):
            options.append({"action": choose_action(i), "value": percept.value() - 1})
        elif percept is None and i != 0:
            options.append({"action": choose_action(i), "value": -1})
    
    if len(options) > 0:
        max_value = max(options, key=lambda item: item["value"])["value"]
        indices = [index for index, item in enumerate(options) if item["value"] == max_value]
        chosen = indices[randrange(len(indices))]
        return options[chosen]["action"]
    return "NoOp"


def choose_action(index):
    if index == 0:
        return "Take"
    elif index == 1:
        return "Move Left"
    elif index == 2:
        return "Move Right"
    elif index == 3:
        return "Move Up"
    elif index == 4:
        return "Move Down"


items = [{"x": 1, "y": 1, "item": Idol()},
         {"x": 1, "y": 2, "item": FloorTrap()},
         {"x": 2, "y": 2, "item": Idol()},
         {"x": 3, "y": 2, "item": ArrowTrap()},
         {"x": 3, "y": 3, "item": Diamond()},
         {"x": 3, "y": 1, "item": FloorTrap()},
         {"x": 4, "y": 4, "item": Idol()},
         {"x": 3, "y": 4, "item": ArrowTrap()},
         {"x": 5, "y": 4, "item": Diamond()},
         {"x": 4, "y": 5, "item": ArrowTrap()},
         {"x": 5, "y": 5, "item": ArrowTrap()}]
exp = Explorer(Position(1, 0), program=program)
a = CaveEnvironment(items, agent=exp)

a.run(100)