chore: Add a fifth action that controls the scaling of the propeller …

…actions
iwishiwasaneagle · Feb 22, 2024 · 17a6e79 · 17a6e79
1 parent 675947e
commit 17a6e79
Showing 1 changed file with 5 additions and 1 deletion.
diff --git a/docs/examples/drl_hover_square_error.py b/docs/examples/drl_hover_square_error.py
@@ -39,7 +39,9 @@ def __init__(self, *args, **kwargs):
             low=low, high=high, dtype=self.observation_space.dtype
         )
         self.action_space = gymnasium.spaces.Box(
-            low=np.zeros(4), high=np.full(4, 1.0), dtype=self.action_space.dtype
+            low=np.array((0, 0, 0, 0, 1)),
+            high=np.array((1, 1, 1, 1, 1000)),
+            dtype=self.action_space.dtype,
         )
 
     def reset(
@@ -52,6 +54,7 @@ def reset(
         return super().reset(seed=seed, options=options)
 
     def step(self, action: PropellerAction) -> Tuple[State, float, bool, bool, dict]:
+        action = action[:4] * action[4]
         obs, _, trunc, term, info = super().step(action)
         distance_from_tgt = np.linalg.norm(self.hover_tgt - self.state.pos)
         reward = -np.square(distance_from_tgt)
@@ -61,6 +64,7 @@ def step(self, action: PropellerAction) -> Tuple[State, float, bool, bool, dict]
 
 def make_env():
     env = HoverMSERewardWrapperNonlinearDynamicModelDroneEnv()
+    env = gymnasium.wrappers.NormalizeObservation(env)
     env = TimeLimit(env, int(10 / env.dt))
     env = Monitor(env)
     return env