In [2]:
"""
Exercise 1: Agents and Environments
"""
import random
from enum import Enum
from typing import Tuple

class Location(Enum):
    A = "A"
    B = "B"

class Status(Enum):
    CLEAN = "Clean"
    DIRTY = "Dirty"

class Action(Enum):
    LEFT = "Left"
    RIGHT = "Right"
    SUCK = "Suck"
    NOOP = "NoOp"

class VacuumEnvironment:
    """Simple two-location vacuum environment"""
    def __init__(self):
        self.locations = {Location.A: Status.DIRTY, Location.B: Status.DIRTY}
        self.agent_location = Location.A
        self.performance = 0
        self.time_steps = 0
    
    def percept(self) -> Tuple[Location, Status]:
        """Return current percept: [Location, Status]"""
        return (self.agent_location, self.locations[self.agent_location])
    
    def execute(self, action: Action):
        """Execute action and update environment"""
        self.time_steps += 1
        
        if action == Action.SUCK:
            if self.locations[self.agent_location] == Status.DIRTY:
                self.locations[self.agent_location] = Status.CLEAN
                self.performance += 10  # Reward for cleaning
        elif action == Action.LEFT:
            self.agent_location = Location.A
            self.performance -= 1  # Cost of movement
        elif action == Action.RIGHT:
            self.agent_location = Location.B
            self.performance -= 1  # Cost of movement
    
    def is_clean(self) -> bool:
        return all(status == Status.CLEAN for status in self.locations.values())
    
    def __str__(self):
        return f"[{self.locations[Location.A].value}] Agent@{self.agent_location.value} [{self.locations[Location.B].value}] | Perf: {self.performance}"

# Demonstration
print("="*70)
print("EXERCISE 1: Agent-Environment Interaction")
print("="*70)

env = VacuumEnvironment()
print(f"\nInitial state: {env}")
print(f"Current percept: {env.percept()}")
print("\nExecuting actions manually:")

# Manual action sequence
env.execute(Action.SUCK)
print(f"After SUCK: {env}")

env.execute(Action.RIGHT)
print(f"After RIGHT: {env}")

env.execute(Action.SUCK)
print(f"After SUCK: {env}")

print(f"\nFinal Performance: {env.performance}")

EXERCISE 1: Agent-Environment Interaction

Initial state: [Dirty] Agent@A [Dirty] | Perf: 0
Current percept: (<Location.A: 'A'>, <Status.DIRTY: 'Dirty'>)

Executing actions manually:
After SUCK: [Clean] Agent@A [Dirty] | Perf: 10
After RIGHT: [Clean] Agent@B [Dirty] | Perf: 9
After SUCK: [Clean] Agent@B [Clean] | Perf: 19

Final Performance: 19


1. ## What is the difference between the agent's percept and the full environment state? Consider what information is hidden from the agent.
    The full environment has two squares, and the squares have clean/dirty status. The percept only knows the status of the square it is currently on.


2. ## How does the performance measure influence what actions are "good"? What would/could happen if we changed the reward/cost values?
    If it cleans, it is rewarded. If it moves, it has a slight punishment. If the cost of moving was raised, especially if it was greater than the reward of cleaning, the agent might never move at all.