In [1]:
import numpy as np
from enum import Enum
import random

In [68]:
class Action(Enum):
    UP      = [ 0,  1]
    DOWN    = [ 0, -1]
    LEFT    = [-1,  0]
    RIGHT   = [ 1,  0]

class Environment:
    def __init__(self) -> None:

        self.WALLS = [
            [0, 5], [2, 5], [3, 5], [4, 5], [5, 5],
            [5, 0], [5, 2], [5, 3], [5, 4], [5, 5], [5, 6], [5, 7], [5, 9], [5, 10],
            [6, 4], [7, 4], [9, 4], [10, 4]
        ]

        self.GOAL = [10, 10]

    def get_possible_states(self, state, action:Action):
        """
        This function returns an array with the all possible valid states, when the current state and an action is given as input
        """
        if state == self.GOAL:
            return [[0,0]]
        specified_state = list(map(sum, zip(state, action.value)))
        
        if not all(coordinate >= 0 and coordinate <= 10 for coordinate in specified_state) or specified_state in self.WALLS:
            specified_state = state

        noisy_actions = [Action.UP, Action.DOWN] if action == Action.RIGHT or action == Action.LEFT else [Action.LEFT, Action.RIGHT]
        noisy_states_unfiltered  = [list(map(sum, zip(state, noisy_action.value))) for noisy_action in noisy_actions]
        noisy_states = list(filter(lambda noisy_state: all(coordinate >= 0 and coordinate <= 10 for coordinate in noisy_state) and noisy_state not in self.WALLS,
                                    noisy_states_unfiltered))
        if noisy_states == []:
            noisy_states.append(state)
        
        possible_states = [specified_state] + noisy_states
        
        return(possible_states)

In [80]:
env = Environment()
dim_x = 10
dim_y = 10

print("   s\t  a\t    s'\t\t  r\tp(s', r|s, a)")
for x in range(dim_x+1):
    for y in range(dim_y+1):
        if [x, y] in env.WALLS:
            pass

        else:
            for a in Action:
                possible_actions = env.get_possible_states([x, y], a)
                prob = [0.8, 0.1, 0.1] if len(possible_actions)==3 else [0.9, 0.1]
                
                for p_idx, p in enumerate(possible_actions):
                    r = 1 if p == env.GOAL else 0
                    if [x, y] == env.GOAL:
                        prob = [1]
                    print(f"{[x, y]}\t  {a.name}\t  {p}\t  {r}\t  {prob[p_idx]}")

   s	  a	    s'		  r	p(s', r|s, a)
[0, 0]	  UP	  [0, 1]	  0	  0.9
[0, 0]	  UP	  [1, 0]	  0	  0.1
[0, 0]	  DOWN	  [0, 0]	  0	  0.9
[0, 0]	  DOWN	  [1, 0]	  0	  0.1
[0, 0]	  LEFT	  [0, 0]	  0	  0.9
[0, 0]	  LEFT	  [0, 1]	  0	  0.1
[0, 0]	  RIGHT	  [1, 0]	  0	  0.9
[0, 0]	  RIGHT	  [0, 1]	  0	  0.1
[0, 1]	  UP	  [0, 2]	  0	  0.9
[0, 1]	  UP	  [1, 1]	  0	  0.1
[0, 1]	  DOWN	  [0, 0]	  0	  0.9
[0, 1]	  DOWN	  [1, 1]	  0	  0.1
[0, 1]	  LEFT	  [0, 1]	  0	  0.8
[0, 1]	  LEFT	  [0, 2]	  0	  0.1
[0, 1]	  LEFT	  [0, 0]	  0	  0.1
[0, 1]	  RIGHT	  [1, 1]	  0	  0.8
[0, 1]	  RIGHT	  [0, 2]	  0	  0.1
[0, 1]	  RIGHT	  [0, 0]	  0	  0.1
[0, 2]	  UP	  [0, 3]	  0	  0.9
[0, 2]	  UP	  [1, 2]	  0	  0.1
[0, 2]	  DOWN	  [0, 1]	  0	  0.9
[0, 2]	  DOWN	  [1, 2]	  0	  0.1
[0, 2]	  LEFT	  [0, 2]	  0	  0.8
[0, 2]	  LEFT	  [0, 3]	  0	  0.1
[0, 2]	  LEFT	  [0, 1]	  0	  0.1
[0, 2]	  RIGHT	  [1, 2]	  0	  0.8
[0, 2]	  RIGHT	  [0, 3]	  0	  0.1
[0, 2]	  RIGHT	  [0, 1]	  0	  0.1
[0, 3]	  UP	  [0, 4]	  0	  0.9
[0, 3]	  UP	  