In [1]:
import gymnasium as gym
from openaigym.gridworld import GridWorldEnv

# Register the environment so we can create it with gym.make()
gym.register(
    id="gymnasium_env/GridWorld-v0",
    entry_point=GridWorldEnv,
    max_episode_steps=300,  # Prevent infinite episodes
)

In [3]:
# Create the environment like any built-in environment
env = gym.make("gymnasium_env/GridWorld-v0")
env

<TimeLimit<OrderEnforcing<PassiveEnvChecker<GridWorldEnv<gymnasium_env/GridWorld-v0>>>>>

In [4]:
# Customize environment parameters
env = gym.make("gymnasium_env/GridWorld-v0", size=10)
env.unwrapped.size

10

In [5]:
# Create multiple environments for parallel training
vec_env = gym.make_vec("gymnasium_env/GridWorld-v0", num_envs=3)
vec_env

SyncVectorEnv(gymnasium_env/GridWorld-v0, num_envs=3)

In [7]:
from gymnasium.utils.env_checker import check_env

# This will catch many common issues
try:
    check_env(env.unwrapped)
    print("Environment passes all checks!")
except Exception as e:
    print(f"Environment has issues: {e}")

Environment passes all checks!


In [8]:
# Test specific action sequences to verify behavior
env = gym.make("gymnasium_env/GridWorld-v0")
obs, info = env.reset(seed=42)  # Use seed for reproducible testing

print(f"Starting position - Agent: {obs['agent']}, Target: {obs['target']}")

# Test each action type
actions = [0, 1, 2, 3]  # right, up, left, down
for action in actions:
    old_pos = obs['agent'].copy()
    obs, reward, terminated, truncated, info = env.step(action)
    new_pos = obs['agent']
    print(f"Action {action}: {old_pos} -> {new_pos}, reward={reward}")

Starting position - Agent: [0 3], Target: [3 2]
Action 0: [0 3] -> [1 3], reward=0
Action 1: [1 3] -> [1 4], reward=0
Action 2: [1 4] -> [0 4], reward=0
Action 3: [0 4] -> [0 3], reward=0


In [10]:
from gymnasium.wrappers import FlattenObservation

# Original observation is a dictionary
env = gym.make('gymnasium_env/GridWorld-v0')
env.observation_space

Dict('agent': Box(0, 4, (2,), int64), 'target': Box(0, 4, (2,), int64))

In [11]:
obs, info = env.reset()
obs

{'agent': array([3, 3]), 'target': array([1, 4])}

In [12]:
# Wrap it to flatten observations into a single array
wrapped_env = FlattenObservation(env)
wrapped_env.observation_space

Box(0, 4, (4,), int64)

In [13]:
obs, info = wrapped_env.reset()
obs

array([3, 3, 4, 2])