In [1]:
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.env_util import make_vec_env
import numpy as np

from uav_rl_navigation import config
from uav_rl_navigation.utils.env_helpers import make_flat_voyager
from uav_rl_navigation.environment.metrics_wrapper import UAVMetricsWrapper

ModuleNotFoundError: No module named 'uav_rl_navigation'

In [2]:
# analyse environment characteristics
def analyse_env(env):
    # reset
    out = env.reset()
    if hasattr(env, "num_envs"):
        obs = out  # VecEnv only returns obs
        n = env.num_envs
        # build batch of actions
        actions = np.stack([env.action_space.sample() for _ in range(n)])
        batch = env.step(actions)
        # VecEnv.step returns (obs, rewards, dones, infos)
        obs2, rews, dones, infos = batch
        print(f"VecEnv:\nobs space:\n{env.observation_space}\nstacked obs sample:\n{obs}\naction space:\n{env.action_space}\nstacked actions sample:\n{actions}\nstacked reward samples:\n{rews}\n")
    else:
        obs, info = out  # single env returns (obs, info)
        action = env.action_space.sample()
        obs2, rew, term, trunc, info2 = env.step(action)
        print(f"Single Env:\nobs space:\n{env.observation_space}\nobs sample:\n{obs}\naction space:\n{env.action_space}\naction sample:\n{action}\nreward sample:\n{rew}\n")
    
    # print env wrappers
    inner_env = env.envs[0]  # grab the first (and only) inner env

    # now peel off wrappers
    chain = []
    current = inner_env
    while True:
        # print(current)
        chain.append(type(current).__name__)
        # most wrappers keep the inner env in .env
        if not hasattr(current, "env"):
            break
        current = current.env

    print("Wrapper chain:", " -> ".join(chain))

# init and check env

In [4]:
# # check flat_env (PyFlyt) for Gymnasium and SB3 compatibility
env = make_flat_voyager()
env = UAVMetricsWrapper(env)
check_env(env, warn=True)

[A                             [Aargv[0]=

[A                             [A
argv[0]=
[A                             [A
argv[0]=




In [8]:
# instantiate vec_env with SB3
env = make_vec_env(make_flat_voyager, n_envs=1, env_kwargs=dict(num_waypoints=1, with_metrics=False))
env

<stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv at 0x301a9e960>

In [7]:
# analyse vec_env characteristics
analyse_env(env)

[A                             [A
argv[0]=
VecEnv:
obs space:
Box(-inf, inf, (13,), float32)
stacked obs sample:
[[ 0.          0.          0.          0.         -0.13135155  0.99134576
   0.          0.          0.          0.         -1.0811628  -0.30549684
  -0.21590877]]
action space:
Box([-26.   -26.    -2.62  -8.  ], [26.   26.    2.62  8.  ], (4,), float32)
stacked actions sample:
[[-16.163414   20.410658    2.5783305   7.610661 ]]
stacked reward samples:
[0.24896625]

Wrapper chain: Monitor -> UAVMetricsWrapper -> FlattenVectorVoyagerEnv -> VectorVoyagerEnv -> Aviary


In [9]:
analyse_env(env)

[A                             [A
argv[0]=
VecEnv:
obs space:
Box(-inf, inf, (13,), float32)
stacked obs sample:
[[ 0.          0.          0.          0.         -0.1434835   0.99134105
   0.          0.          0.          0.          0.05121566 -0.6430171
  -0.17290227]]
action space:
Box([-26.   -26.    -2.62  -8.  ], [26.   26.    2.62  8.  ], (4,), float32)
stacked actions sample:
[[ -1.0701265  -22.899519    -0.76422775   5.240437  ]]
stacked reward samples:
[0.49610892]

Wrapper chain: Monitor -> FlattenVectorVoyagerEnv -> VectorVoyagerEnv -> Aviary
