In [1]:
import gymnasium as gym
import time
from warnings import filterwarnings
print(f"gym.__version__: {gym.__version__}")

gym.__version__: 0.29.1


In [2]:
env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False, render_mode="human")

In [3]:
ACTION_STRING_LIST = [" LEFT", " DOWN", "RIGHT", "   UP"]

In [4]:
def env_info_details():
    #####################
    # observation space #
    #####################
    print("*" * 80)
    print("[observation_space]")
    print(env.observation_space)
    print(env.observation_space.n)
    # We should expect to see 15 possible grids from 0 to 15 when
    # we uniformly randomly sample from our observation space
    for i in range(10):
        print(env.observation_space.sample(), end=" ")
    print()

    print("*" * 80)
    ################
    # action space #
    ################
    print("[action_space]")
    print(env.action_space)
    print(env.action_space.n)
    # We should expect to see 4 actions when
    # we uniformly randomly sample:
    #     1. LEFT: 0
    #     2. DOWN: 1
    #     3. RIGHT: 2
    #     4. UP: 3
    for i in range(10):
        print(env.action_space.sample(), end=" ")
    print()

    print("*" * 80)
    # This sets the initial state at S, our starting point
    # We can render the environment to see where we are on the 4x4 frozenlake gridworld
    observation, info = env.reset()

    action = 2  # RIGHT
    next_observation, reward, terminated, truncated, info = env.step(action)

    # Prob = 1: deterministic policy, if we choose to go right, we'll go right
    print("Obs.: {0}, Action: {1}({2}), Next Obs.: {3}, Reward: {4}, Terminated: {5}, Truncated: {6}, Info: {7}".format(
        observation, action, ACTION_STRING_LIST[action], next_observation, reward, terminated, truncated, info
    ))

    observation = next_observation

    time.sleep(3)

    action = 1  # DOWN
    next_observation, reward, terminated, truncated, info = env.step(action)

    print("Obs.: {0}, Action: {1}({2}), Next Obs.: {3}, Reward: {4}, Terminated: {5}, Truncated: {6}, Info: {7}".format(
        observation, action, ACTION_STRING_LIST[action], next_observation, reward, terminated, truncated, info
    ))

    print("*" * 80)
    time.sleep(3)

- env.observation_space : 4x4 map -> Discrete(16)
- env.observation_space.n : 16
- env.observation_space.sample() : pick random observation therefore return 0~15 random value
- env.action_space : left, right, up, down -> Discreate(4)
- env.action_space.n : 4
- env.action_space.sample() : pick random action therefore return 0~3 random value
- env.reset() : return observation, info but we don't use info

#### observation

|  |  |  |  |
|--|--|--|--|  
| 0| 1| 2| 3|
| 4| 5| 6| 7|  
| 8| 9|10|11|  
|12|13|14|15|  

if execute env.rest function, we are located at (observation 0)  
And then do (action 2 == right), we are located at (next observation 1)

In [5]:
if __name__ == "__main__":
    env_info_details()

********************************************************************************
[observation_space]
Discrete(16)
16
5 9 8 5 13 8 15 1 6 11 
********************************************************************************
[action_space]
Discrete(4)
4
1 1 0 1 1 2 3 3 3 3 
********************************************************************************
Obs.: 0, Action: 2(RIGHT), Next Obs.: 1, Reward: 0.0, Terminated: False, Truncated: False, Info: {'prob': 1.0}
Obs.: 1, Action: 1( DOWN), Next Obs.: 5, Reward: 0.0, Terminated: True, Truncated: False, Info: {'prob': 1.0}
********************************************************************************
