[gym/common] Add angular momentum and support polygon stability reward.

duburcqa · Jun 10, 2024 · c08369b · c08369b
1 parent 71b8873
commit c08369b
Show file tree

Hide file tree

Showing 19 changed files with 839 additions and 172 deletions.
diff --git a/.pylintrc b/.pylintrc
@@ -22,15 +22,15 @@ generated-members = torch, jiminy
 [tool.pylint.basic]
 # Good variable names which should always be accepted, separated by a comma
 good-names =
-    i, j, k, l, N,                    # Python: for-loop indices
-    tb, np, nb, mp, tp,               # Python: classical modules
-    fd, _,                            # Python: contexte
-    t, q, v, x, u, s, qx, qy, qz, qw, # Physics: state, action
-    I, R, H, T, M, dt,                # Physics: dynamics
-    a, b, c, y, z, n, e,              # Maths / Algebra : variables
-    f, rg, lo, hi, op, fn,            # Maths / Algebra : operators
-    kp, kd, ki,                       # Control: Gains
-    ax                                # Matplotlib
+    i, j, k, l, N,                     # Python: for-loop indices
+    tb, np, nb, mp, tp,                # Python: classical modules
+    fd, _,                             # Python: contexte
+    t, q, v, x, u, s, qx, qy, qz, qw,  # Physics: state, action
+    I, R, H, T, M, dt,                 # Physics: dynamics
+    A, a, b, c, y, z, n, e,            # Maths / Algebra : variables
+    f, rg, lo, hi, op, fn,             # Maths / Algebra : operators
+    kp, kd, ki,                        # Control: Gains
+    ax                                 # Matplotlib
 
 [tool.pylint.format]
 # Regexp for a line that is allowed to be longer than the limit

diff --git a/python/gym_jiminy/common/gym_jiminy/common/bases/interfaces.py b/python/gym_jiminy/common/gym_jiminy/common/bases/interfaces.py
@@ -354,11 +354,11 @@ def stop(self) -> None:
         self.simulator.stop()
 
     @property
+    @abstractmethod
     def unwrapped(self) -> "BaseJiminyEnv":
         """The "underlying environment at the basis of the pipeline from which
         this environment is part of.
         """
-        return self
 
     @property
     @abstractmethod

diff --git a/python/gym_jiminy/common/gym_jiminy/common/compositions/__init__.py b/python/gym_jiminy/common/gym_jiminy/common/compositions/__init__.py
@@ -1,6 +1,7 @@
 # pylint: disable=missing-module-docstring
 
-from .mixin import (radial_basis_function,
+from .mixin import (CUTOFF_ESP,
+                    radial_basis_function,
                     AdditiveMixtureReward,
                     MultiplicativeMixtureReward)
 from .generic import (BaseTrackingReward,
@@ -14,6 +15,7 @@
                          MinimizeAngularMomentumReward)
 
 __all__ = [
+    "CUTOFF_ESP",
     "radial_basis_function",
     "AdditiveMixtureReward",
     "MultiplicativeMixtureReward",

diff --git a/python/gym_jiminy/common/gym_jiminy/common/compositions/generic.py b/python/gym_jiminy/common/gym_jiminy/common/compositions/generic.py
@@ -54,9 +54,10 @@ class BaseTrackingReward(BaseQuantityReward):
     otherwise an exception will be risen. See `DatasetTrajectoryQuantity` and
     `AbstractQuantity` documentations for details.
 
-    The error transform in a normalized reward to maximize by applying RBF
+    The error is transformed in a normalized reward to maximize by applying RBF
     kernel on the error. The reward will be 0.0 if the error cancels out
-    completely and less than 0.01 above the user-specified cutoff threshold.
+    completely and less than 'CUTOFF_ESP' above the user-specified cutoff
+    threshold.
     """
     def __init__(self,
                  env: InterfaceJiminyEnv,

diff --git a/python/gym_jiminy/common/gym_jiminy/common/compositions/mixin.py b/python/gym_jiminy/common/gym_jiminy/common/compositions/mixin.py
@@ -13,7 +13,7 @@
 
 
 # Reward value at cutoff threshold
-RBF_CUTOFF_ESP = 1.0e-2
+CUTOFF_ESP = 1.0e-2
 
 
 ArrayOrScalar = Union[np.ndarray, float]
@@ -51,7 +51,7 @@ def radial_basis_function(error: ArrayOrScalar,
         squared_dist_rel = np.dot(error_, error_) / math.pow(cutoff, 2)
     else:
         squared_dist_rel = math.pow(np.linalg.norm(error_, order) / cutoff, 2)
-    return math.pow(RBF_CUTOFF_ESP, squared_dist_rel)
+    return math.pow(CUTOFF_ESP, squared_dist_rel)
 
 
 class AdditiveMixtureReward(BaseMixtureReward):

diff --git a/python/gym_jiminy/common/gym_jiminy/common/envs/generic.py b/python/gym_jiminy/common/gym_jiminy/common/envs/generic.py
@@ -566,6 +566,10 @@ def step_dt(self) -> float:
     def is_training(self) -> bool:
         return self._is_training
 
+    @property
+    def unwrapped(self) -> "BaseJiminyEnv":
+        return self
+
     def train(self) -> None:
         self._is_training = True
 

diff --git a/python/gym_jiminy/common/gym_jiminy/common/quantities/__init__.py b/python/gym_jiminy/common/gym_jiminy/common/quantities/__init__.py
@@ -26,7 +26,8 @@
                          MultiFootRelativeXYZQuat,
                          CenterOfMass,
                          CapturePoint,
-                         ZeroMomentPoint)
+                         ZeroMomentPoint,
+                         translate_position_odom)
 
 
 __all__ = [
@@ -57,4 +58,5 @@
     'CenterOfMass',
     'CapturePoint',
     'ZeroMomentPoint',
+    'translate_position_odom'
 ]
diff --git a/python/gym_jiminy/common/gym_jiminy/common/quantities/generic.py b/python/gym_jiminy/common/gym_jiminy/common/quantities/generic.py
@@ -230,6 +230,9 @@ def initialize(self) -> None:
         # Re-allocate memory as the number of frames is not known in advance.
         # Note that Fortran memory layout (column-major) is used for speed up
         # because it preserves contiguity when copying frame data.
+        # Anyway, C memory layout (row-major) does not make sense in this case
+        # since chunks of columns are systematically extracted, which means
+        # that the returned array would NEVER be contiguous.
         nframes = len(self.frame_names)
         self._rot_mat_batch = np.zeros((3, 3, nframes), order='F')
 
@@ -387,9 +390,9 @@ def initialize(self) -> None:
         # Re-allocate memory as the number of frames is not known in advance
         nframes = len(self.frame_names)
         if self.type in (OrientationType.EULER, OrientationType.ANGLE_AXIS):
-            self._data_batch = np.zeros((3, nframes), order='C')
+            self._data_batch = np.zeros((3, nframes), order='F')
         elif self.type == OrientationType.QUATERNION:
-            self._data_batch = np.zeros((4, nframes), order='C')
+            self._data_batch = np.zeros((4, nframes), order='F')
 
         # Re-assign mapping from chunks of frame names to corresponding data
         if self.type is not OrientationType.MATRIX:
@@ -640,7 +643,7 @@ def initialize(self) -> None:
 
         # Re-allocate memory as the number of frames is not known in advance
         nframes = len(self.frame_names)
-        self._pos_batch = np.zeros((3, nframes), order='C')
+        self._pos_batch = np.zeros((3, nframes), order='F')
 
         # Refresh proxies
         self._pos_views.clear()
@@ -908,7 +911,7 @@ def __init__(self,
             auto_refresh=False)
 
         # Pre-allocate memory for storing the pose XYZQuat of all frames
-        self._xyzquats = np.zeros((7, len(frame_names)), order='C')
+        self._xyzquats = np.zeros((7, len(frame_names)), order='F')
 
     def refresh(self) -> np.ndarray:
         # Copy the position of all frames at once in contiguous buffer

diff --git a/python/gym_jiminy/envs/gym_jiminy/envs/atlas.py b/python/gym_jiminy/envs/gym_jiminy/envs/atlas.py
@@ -18,7 +18,7 @@
                                       PDAdapter,
                                       MahonyFilter)
 from gym_jiminy.common.utils import build_pipeline
-from gym_jiminy.toolbox.math import ConvexHull
+from gym_jiminy.toolbox.math import ConvexHull2D
 
 if sys.version_info < (3, 9):
     from importlib_resources import files
@@ -100,18 +100,19 @@
 def _cleanup_contact_points(env: WalkerJiminyEnv) -> None:
     contact_frame_indices = env.robot.contact_frame_indices
     contact_frame_names = env.robot.contact_frame_names
-    num_contacts = int(len(env.robot.contact_frame_indices) // 2)
+    num_contacts = len(env.robot.contact_frame_indices) // 2
     for contact_slice in (slice(num_contacts), slice(num_contacts, None)):
         contact_positions = np.stack([
             env.robot.pinocchio_data.oMf[frame_index].translation
-            for frame_index in contact_frame_indices[contact_slice]], axis=0)
+            for frame_index in contact_frame_indices[contact_slice]
+            ], axis=0)
         contact_bottom_index = np.argsort(
-            contact_positions[:, 2])[:int(num_contacts//2)]
-        convex_hull = ConvexHull(contact_positions[contact_bottom_index, :2])
+            contact_positions[:, 2])[:(num_contacts // 2)]
+        convex_hull = ConvexHull2D(contact_positions[contact_bottom_index, :2])
         env.robot.remove_contact_points([
             contact_frame_names[contact_slice][i]
             for i in set(range(num_contacts)).difference(
-                contact_bottom_index[convex_hull._vertex_indices])])
+                contact_bottom_index[convex_hull.indices])])
 
 
 class AtlasJiminyEnv(WalkerJiminyEnv):

diff --git a/python/gym_jiminy/toolbox/gym_jiminy/toolbox/compositions/__init__.py b/python/gym_jiminy/toolbox/gym_jiminy/toolbox/compositions/__init__.py
@@ -0,0 +1,8 @@
+# pylint: disable=missing-module-docstring
+
+from .locomotion import tanh_normalization, MaximizeStability
+
+__all__ = [
+    "tanh_normalization",
+    "MaximizeStability"
+]
diff --git a/python/gym_jiminy/toolbox/gym_jiminy/toolbox/compositions/locomotion.py b/python/gym_jiminy/toolbox/gym_jiminy/toolbox/compositions/locomotion.py
@@ -0,0 +1,102 @@
+"""Rewards mainly relevant for locomotion tasks on floating-base robots.
+"""
+import math
+from functools import partial
+
+import numba as nb
+
+from gym_jiminy.common.compositions import CUTOFF_ESP
+from gym_jiminy.common.bases import (
+    InterfaceJiminyEnv, QuantityEvalMode, BaseQuantityReward)
+
+from ..quantities import StabilityMarginProjectedSupportPolygon
+
+
+@nb.jit(nopython=True, cache=True)
+def tanh_normalization(value: float,
+                       cutoff_low: float,
+                       cutoff_high: float) -> float:
+    """Normalize a given quantity between 0.0 and 1.0.
+
+    The extremum 0.0 and 1.0 correspond to the upper and lower cutoff
+    respectively, if the lower cutoff is smaller than the upper cutoff. The
+    other way around otherwise. These extremum are reached asymptotically,
+    which is that the gradient is never zero but rather vanishes exponentially.
+    The gradient will be steeper if the cutoff range is tighter and the other
+    way around.
+
+    :param value: Value of the scalar floating-point quantity. The quantity may
+                  be bounded or unbounded, and signed or not, without
+                  restrictions.
+    :param cutoff: Cut-off threshold to consider.
+    :param order: Order of Lp-Norm that will be used as distance metric.
+    """
+    value_rel = (
+        cutoff_high + cutoff_low - 2 * value) / (cutoff_high - cutoff_low)
+    return 1.0 / (1.0 + math.pow(CUTOFF_ESP / (1.0 - CUTOFF_ESP), value_rel))
+
+
+class MaximizeStability(BaseQuantityReward):
+    """Encourage the agent to maintain itself in postures as robust as possible
+    to external disturbances.
+
+    The signed distance is transformed in a normalized reward to maximize by
+    applying rescaled tanh. The reward is smaller than CUTOFF_ESP if the ZMP is
+    outside the projected support polygon and further away from the border than
+    the upper cutoff. Conversely, the reward is larger than 1.0 - CUTOFF_ESP if
+    the ZMP is inside the projected support polygon and further away from the
+    border than the lower cutoff.
+
+    The agent may opt from one of the two very different strategies to maximize
+    this reward:
+      * Foot placement: reshaping the projected support polygon by moving the
+        feet (aka the candidate contact points in the direction of the ZMP
+        without actually moving the ZMP itself.
+      * Torso/Ankle control: Modulating the linear and angular momentum of its
+        upper-body to move the ZMP closer to the Chebyshev center of the
+        projected support polygon while holding the feet at the exact same
+        location.
+
+    These two strategies are complementary rather than mutually exclusive.
+    Usually, ankle control is preferred for small disturbances. Foot placement
+    comes to place when ankle control is no longer sufficient to keep balance.
+    Indeed, the first strategy is only capable of recovering 0-step capturable
+    disturbances, while the second one is only limited to inf-step capturable
+    disturbances, which includes and dramatically extends 0-step capturability.
+    """
+    def __init__(self,
+                 env: InterfaceJiminyEnv,
+                 cutoff_inner: float,
+                 cutoff_outer: float) -> None:
+        """
+        :param env: Base or wrapped jiminy environment.
+        :param cutoff_inner: Cutoff threshold when the ZMP lies inside the
+                             support polygon. The reward will be larger than
+                             '1.0 - CUTOFF_ESP' if the distance from the border
+                             is larger than 'cutoff_inner'.
+        :param cutoff_outer: Cutoff threshold when the ZMP lies outside the
+                             support polygon. The reward will be smaller than
+                             'CUTOFF_ESP' if the ZMP is further away from the
+                             border of the support polygon than 'cutoff_outer'.
+        """
+        # Backup some user argument(s)
+        self.cutoff_inner = cutoff_inner
+        self.cutoff_outer = cutoff_outer
+
+        # The cutoff thresholds must be positive
+        if self.cutoff_inner < 0.0 or self.cutoff_outer < 0.0:
+            raise ValueError(
+                "The inner and outer cutoff must both be positive.")
+
+        # Call base implementation
+        super().__init__(
+            env,
+            "reward_momentum",
+            (StabilityMarginProjectedSupportPolygon, dict(
+                mode=QuantityEvalMode.TRUE
+            )),
+            partial(tanh_normalization,
+                    cutoff_low=self.cutoff_inner,
+                    cutoff_high=-self.cutoff_outer),
+            is_normalized=True,
+            is_terminal=False)
diff --git a/python/gym_jiminy/toolbox/gym_jiminy/toolbox/math/__init__.py b/python/gym_jiminy/toolbox/gym_jiminy/toolbox/math/__init__.py
@@ -1,10 +1,10 @@
 # pylint: disable=missing-module-docstring
 
-from .qhull import ConvexHull, compute_distance_convex_to_point
+from .qhull import ConvexHull2D, compute_convex_chebyshev_center
 
 __all__ = [
-    "ConvexHull",
-    "compute_distance_convex_to_point"
+    "ConvexHull2D",
+    "compute_convex_chebyshev_center"
 ]
 
 try: