In [1]:
import numpy as np

class GridWorldPOMDP:
    def __init__(self, width, height, goal, obstacles):
        self.width = width
        self.height = height
        self.goal = goal
        self.obstacles = obstacles
        self.agent_pos = (0, 0)

    def reset(self):
        self.agent_pos = (0, 0)
        return self.get_observation()

    def get_observation(self):
        return self.agent_pos  # Partial observation of agent's current position

    def step(self, action):
        if isinstance(action, int):  # Check if action is an integer
            # Map action index to (dx, dy) tuple
            if action == 0:
                dx, dy = 0, 1  # Move up
            elif action == 1:
                dx, dy = 0, -1  # Move down
            elif action == 2:
                dx, dy = -1, 0  # Move left
            elif action == 3:
                dx, dy = 1, 0  # Move right
        else:  # Assume action is already a tuple
            dx, dy = action

        x, y = self.agent_pos
        new_x = min(max(x + dx, 0), self.width - 1)
        new_y = min(max(y + dy, 0), self.height - 1)

        self.agent_pos = (new_x, new_y)

        reward = -1  # Small negative reward for each step
        done = self.agent_pos == self.goal  # Episode ends when goal is reached
        return self.get_observation(), reward, done

class ParticleFilter:
    def __init__(self, width, height, num_particles):
        self.width = width
        self.height = height
        self.num_particles = num_particles
        self.particles = self.initialize_particles()

    def initialize_particles(self):
        particles = []
        for _ in range(self.num_particles):
            x = np.random.randint(0, self.width)
            y = np.random.randint(0, self.height)
            particles.append((x, y))
        return particles

    def update_belief_state(self, observation, action):
        # Update belief state based on observation and action
        # For simplicity, we'll just randomly shuffle the particles
        np.random.shuffle(self.particles)

    def estimate_state(self):
        # Estimate current state based on particles (e.g., mean or mode)
        # For simplicity, we'll return the mode of particles
        counts = {}
        for particle in self.particles:
            counts[particle] = counts.get(particle, 0) + 1
        estimated_state = max(counts, key=counts.get)
        return estimated_state

class POMDPAgent:
    def __init__(self, belief_state):
        self.belief_state = belief_state
        self.history = []

    def update_history(self, observation, action):
        self.history.append((observation, action))

    def choose_action(self):
        # Choose action based on belief state and history
        # For simplicity, we'll randomly select an action
        return np.random.randint(0, 4)  # 4 possible actions (up, down, left, right)

# Main loop
if __name__ == "__main__":
    # Define grid world environment
    width = 5
    height = 5
    goal = (4, 4)
    obstacles = [(1, 1), (2, 2), (3, 3)]
    env = GridWorldPOMDP(width, height, goal, obstacles)

    # Initialize belief state with particle filter
    num_particles = 100
    belief_state = ParticleFilter(width, height, num_particles)

    # Create POMDP agent with belief state
    agent = POMDPAgent(belief_state)

    # Training loop
    num_episodes = 100
    for episode in range(num_episodes):
        observation = env.reset()
        done = False
        action = None  # Initialize action variable

        while not done:
            # Update history with observation and action
            agent.update_history(observation, action)

            # Update belief state with observation and action history
            belief_state.update_belief_state(observation, action)

            # Estimate current state based on belief state
            estimated_state = belief_state.estimate_state()

            # Choose action based on belief state and history
            action = agent.choose_action()

            # Take action and observe next state, reward, and new observation
            next_observation, reward, done = env.step(action)

            # Print belief state and estimated state
            print("Belief state:", belief_state.particles)
            print("Estimated state:", estimated_state)

            # Print agent movement
            print("Agent movement:", observation, "->", next_observation)


NameError: name 'action' is not defined