Example 1:
Simple Gridworld MDP

This MDP represents a simple grid where an agent starts at one point and tries to reach a goal.


In [None]:

import numpy as np

class GridWorldMDP:
    def __init__(self, grid_size, start_state, goal_state, reward_grid=None):
        self.grid_size = grid_size
        self.start_state = start_state
        self.goal_state = goal_state
        self.state = start_state
        self.reward_grid = reward_grid if reward_grid is not None else np.zeros(grid_size)
        self.reward_grid[goal_state] = 1.0  # Goal state reward

    def get_possible_actions(self):
        return ['up', 'down', 'left', 'right']

    def take_action(self, action):
        x, y = self.state
        if action == 'up':
            self.state = (max(0, x-1), y)
        elif action == 'down':
            self.state = (min(self.grid_size[0]-1, x+1), y)
        elif action == 'left':
            self.state = (x, max(0, y-1))
        elif action == 'right':
            self.state = (x, min(self.grid_size[1]-1, y+1))

        reward = self.reward_grid[self.state]
        done = self.state == self.goal_state
        return self.state, reward, done

    def reset(self):
        self.state = self.start_state
        return self.state

# Initialize the MDP
mdp = GridWorldMDP(grid_size=(4, 4), start_state=(0, 0), goal_state=(3, 3))

# Example run
state = mdp.reset()
done = False
total_reward = 0

while not done:
    action = np.random.choice(mdp.get_possible_actions())
    next_state, reward, done = mdp.take_action(action)
    total_reward += reward
    print(f"Action: {action}, New State: {next_state}, Reward: {reward}, Done: {done}")

print(f"Total Reward: {total_reward}")



Action: left, New State: (0, 0), Reward: 0.0, Done: False
Action: down, New State: (1, 0), Reward: 0.0, Done: False
Action: up, New State: (0, 0), Reward: 0.0, Done: False
Action: down, New State: (1, 0), Reward: 0.0, Done: False
Action: left, New State: (1, 0), Reward: 0.0, Done: False
Action: down, New State: (2, 0), Reward: 0.0, Done: False
Action: up, New State: (1, 0), Reward: 0.0, Done: False
Action: left, New State: (1, 0), Reward: 0.0, Done: False
Action: left, New State: (1, 0), Reward: 0.0, Done: False
Action: left, New State: (1, 0), Reward: 0.0, Done: False
Action: down, New State: (2, 0), Reward: 0.0, Done: False
Action: up, New State: (1, 0), Reward: 0.0, Done: False
Action: right, New State: (1, 1), Reward: 0.0, Done: False
Action: up, New State: (0, 1), Reward: 0.0, Done: False
Action: down, New State: (1, 1), Reward: 0.0, Done: False
Action: up, New State: (0, 1), Reward: 0.0, Done: False
Action: left, New State: (0, 0), Reward: 0.0, Done: False
Action: down, New State