Example 2:
MDP for Robot Navigation (Stochastic)

This MDP represents a robot navigating through a grid with stochastic outcomes (actions have a probability of failure).


In [None]:
import random

class StochasticGridMDP:
    def __init__(self, grid_size, start_state, goal_state, action_fail_prob=0.2):
        self.grid_size = grid_size
        self.start_state = start_state
        self.goal_state = goal_state
        self.state = start_state
        self.action_fail_prob = action_fail_prob

    def get_possible_actions(self):
        return ['up', 'down', 'left', 'right']

    def take_action(self, action):
        # With some probability, action will fail and result in a random move
        if random.random() < self.action_fail_prob:
            action = random.choice(self.get_possible_actions())

        x, y = self.state
        if action == 'up':
            self.state = (max(0, x-1), y)
        elif action == 'down':
            self.state = (min(self.grid_size[0]-1, x+1), y)
        elif action == 'left':
            self.state = (x, max(0, y-1))
        elif action == 'right':
            self.state = (x, min(self.grid_size[1]-1, y+1))

        reward = 1 if self.state == self.goal_state else -0.1
        done = self.state == self.goal_state
        return self.state, reward, done

    def reset(self):
        self.state = self.start_state
        return self.state

# Initialize the MDP
stochastic_mdp = StochasticGridMDP(grid_size=(5, 5), start_state=(0, 0), goal_state=(4, 4), action_fail_prob=0.3)

# Example run
state = stochastic_mdp.reset()
done = False
total_reward = 0

while not done:
    action = np.random.choice(stochastic_mdp.get_possible_actions())
    next_state, reward, done = stochastic_mdp.take_action(action)
    total_reward += reward
    print(f"Action: {action}, New State: {next_state}, Reward: {reward}, Done: {done}")

print(f"Total Reward: {total_reward}")

