In [2]:
"""
Exercise 1: Agents and Environments
"""
import random
from enum import Enum
from typing import Tuple

class Location(Enum):
    A = "A"
    B = "B"

class Status(Enum):
    CLEAN = "Clean"
    DIRTY = "Dirty"

class Action(Enum):
    LEFT = "Left"
    RIGHT = "Right"
    SUCK = "Suck"
    NOOP = "NoOp"

class VacuumEnvironment:
    """Simple two-location vacuum environment"""
    def __init__(self):
        self.locations = {Location.A: Status.DIRTY, Location.B: Status.DIRTY}
        self.agent_location = Location.A
        self.performance = 0
        self.time_steps = 0
    
    def percept(self) -> Tuple[Location, Status]:
        """Return current percept: [Location, Status]"""
        return (self.agent_location, self.locations[self.agent_location])
    
    def execute(self, action: Action):
        """Execute action and update environment"""
        self.time_steps += 1
        
        if action == Action.SUCK:
            if self.locations[self.agent_location] == Status.DIRTY:
                self.locations[self.agent_location] = Status.CLEAN
                self.performance += 10  # Reward for cleaning
        elif action == Action.LEFT:
            self.agent_location = Location.A
            self.performance -= 1  # Cost of movement
        elif action == Action.RIGHT:
            self.agent_location = Location.B
            self.performance -= 1  # Cost of movement
    
    def is_clean(self) -> bool:
        return all(status == Status.CLEAN for status in self.locations.values())
    
    def __str__(self):
        return f"[{self.locations[Location.A].value}] Agent@{self.agent_location.value} [{self.locations[Location.B].value}] | Perf: {self.performance}"

# Demonstration
print("="*70)
print("EXERCISE 1: Agent-Environment Interaction")
print("="*70)

env = VacuumEnvironment()
print(f"\nInitial state: {env}")
print(f"Current percept: {env.percept()}")
print("\nExecuting actions manually:")

# Manual action sequence
env.execute(Action.SUCK)
print(f"After SUCK: {env}")

env.execute(Action.RIGHT)
print(f"After RIGHT: {env}")

env.execute(Action.SUCK)
print(f"After SUCK: {env}")

print(f"\nFinal Performance: {env.performance}")

EXERCISE 1: Agent-Environment Interaction

Initial state: [Dirty] Agent@A [Dirty] | Perf: 0
Current percept: (<Location.A: 'A'>, <Status.DIRTY: 'Dirty'>)

Executing actions manually:
After SUCK: [Clean] Agent@A [Dirty] | Perf: 10
After RIGHT: [Clean] Agent@B [Dirty] | Perf: 9
After SUCK: [Clean] Agent@B [Clean] | Perf: 19

Final Performance: 19


1. ## What is the difference between the agent's percept and the full environment state? Consider what information is hidden from the agent.
    The full environment has two squares, and the squares have clean/dirty status. The percept only knows the status of the square it is currently on.


2. ## How does the performance measure influence what actions are "good"? What would/could happen if we changed the reward/cost values?
    If it cleans, it is rewarded. If it moves, it has a slight punishment. If the cost of moving was raised, especially if it was greater than the reward of cleaning, the agent might never move at all.

In [3]:
"""
Exercise 2: Simple Reflex Agent (Rational Behavior)
"""

def simple_reflex_vacuum_agent(percept: Tuple[Location, Status]) -> Action:
    """
    Agent function: maps current percept to action
    Rules:
      - If current location is dirty → SUCK
      - If at location A and clean → move RIGHT
      - If at location B and clean → move LEFT
    """
    location, status = percept
    
    if status == Status.DIRTY:
        return Action.SUCK
    elif location == Location.A:
        return Action.RIGHT
    else:
        return Action.LEFT

print("\n" + "="*70)
print("EXERCISE 2: Simple Reflex Agent Behavior")
print("="*70)
print("\nAgent Rules:")
print("  - IF dirty THEN suck")
print("  - IF at A and clean THEN move right")
print("  - IF at B and clean THEN move left")
print()

env = VacuumEnvironment()
for step in range(8):
    percept = env.percept()
    action = simple_reflex_vacuum_agent(percept)
    print(f"Step {step}: {env}")
    print(f"  Percept: {percept} → Action: {action.value}")
    env.execute(action)
    if env.is_clean():
        print(f"\nStep {step+1}: {env}")
        print("✓ All locations clean!")
        break

print(f"\nFinal Performance Score: {env.performance}")
print(f"Steps taken: {env.time_steps}")


EXERCISE 2: Simple Reflex Agent Behavior

Agent Rules:
  - IF dirty THEN suck
  - IF at A and clean THEN move right
  - IF at B and clean THEN move left

Step 0: [Dirty] Agent@A [Dirty] | Perf: 0
  Percept: (<Location.A: 'A'>, <Status.DIRTY: 'Dirty'>) → Action: Suck
Step 1: [Clean] Agent@A [Dirty] | Perf: 10
  Percept: (<Location.A: 'A'>, <Status.CLEAN: 'Clean'>) → Action: Right
Step 2: [Clean] Agent@B [Dirty] | Perf: 9
  Percept: (<Location.B: 'B'>, <Status.DIRTY: 'Dirty'>) → Action: Suck

Step 3: [Clean] Agent@B [Clean] | Perf: 19
✓ All locations clean!

Final Performance Score: 19
Steps taken: 3


1. ## In this environment, does the agent need memory to act rationally? Why or why not?
    No. It does not need to know if the previous square was clean or dirty, it only needs to know if the current one is clean or dirty.

2. ## Is this agent rational? Does it maximize expected performance given its percept sequence?
    Yes, it is rational, because its percept sequence is to keep trying to clean both squares until they are clean.
    
3. ## What problem would this agent encounter in a larger environment (e.g., 10 locations)? Think about its rule structure.
    It wouldn't know which location isn't clean until it checks all locations, expending cost of moving.

4. ## Could the simple_reflex_vacuum_agent get stuck in an infinite loop? Under what circumstances?
    Yes, if the locations kept getting dirty while the agent isn't there.

In [4]:
"""
Exercise 3: Environment Properties - Stochasticity
"""

class StochasticVacuumEnvironment(VacuumEnvironment):
    """Vacuum environment where SUCK action may fail"""
    def execute(self, action: Action):
        self.time_steps += 1
        
        if action == Action.SUCK:
            if self.locations[self.agent_location] == Status.DIRTY:
                # Only 70% success rate
                if random.random() > 0.3:
                    self.locations[self.agent_location] = Status.CLEAN
                    self.performance += 10
                else:
                    print("    ⚠ SUCK action failed!")
        elif action == Action.LEFT:
            self.agent_location = Location.A
            self.performance -= 1
        elif action == Action.RIGHT:
            self.agent_location = Location.B
            self.performance -= 1

print("\n" + "="*70)
print("EXERCISE 3: Stochastic Environment")
print("="*70)
print("\nEnvironment: SUCK action has 70% success rate")
print()

random.seed(42)  # For reproducible results
env_stochastic = StochasticVacuumEnvironment()

for step in range(15):
    percept = env_stochastic.percept()
    action = simple_reflex_vacuum_agent(percept)
    print(f"Step {step}: {env_stochastic}")
    print(f"  Action: {action.value}")
    env_stochastic.execute(action)
    if env_stochastic.is_clean():
        print(f"\n✓ All locations clean!")
        break

print(f"\nFinal Performance: {env_stochastic.performance}")
print(f"Total steps: {env_stochastic.time_steps}")


EXERCISE 3: Stochastic Environment

Environment: SUCK action has 70% success rate

Step 0: [Dirty] Agent@A [Dirty] | Perf: 0
  Action: Suck
Step 1: [Clean] Agent@A [Dirty] | Perf: 10
  Action: Right
Step 2: [Clean] Agent@B [Dirty] | Perf: 9
  Action: Suck
    ⚠ SUCK action failed!
Step 3: [Clean] Agent@B [Dirty] | Perf: 9
  Action: Suck
    ⚠ SUCK action failed!
Step 4: [Clean] Agent@B [Dirty] | Perf: 9
  Action: Suck
    ⚠ SUCK action failed!
Step 5: [Clean] Agent@B [Dirty] | Perf: 9
  Action: Suck

✓ All locations clean!

Final Performance: 19
Total steps: 6


1. ## Does the simple reflex agent behave rationally in the stochastic environment? Why or why not?
    Yes. The only difference here is that the suck action sometimes fails, and the agent just keeps retrying the suck action until it works.
2. ## What additional capability would help the agent handle stochasticity better? Think about typical failure logic.
    
3. ## Compare the performance scores: How much worse is performance in the stochastic environment?