Tuning parameters.

duburcqa · Aug 26, 2023 · 504c6dd · 504c6dd
1 parent 16a5154
commit 504c6dd
Show file tree

Hide file tree

Showing 10 changed files with 61 additions and 80 deletions.
diff --git a/python/gym_jiminy/common/gym_jiminy/common/blocks/mahony_filter.py b/python/gym_jiminy/common/gym_jiminy/common/blocks/mahony_filter.py
@@ -103,8 +103,7 @@ def __init__(self,
                  update_ratio: int = 1,
                  exact_init: bool = True,
                  kp: Union[np.ndarray, float] = 1.0,
-                 ki: Union[np.ndarray, float] = 0.1,
-                 **kwargs: Any) -> None:
+                 ki: Union[np.ndarray, float] = 0.1) -> None:
         """
         :param name: Name of the block.
         :param env: Environment to connect with.
@@ -116,8 +115,6 @@ def __init__(self,
                            free-falling at init, which is not realistic anyway.
         :param mahony_kp: Proportional gain used for gyro-accel sensor fusion.
         :param mahony_ki: Integral gain used for gyro bias estimate.
-        :param kwargs: Used arguments to allow automatic pipeline wrapper
-                       generation.
         """
         # Handling of default argument(s)
         num_imu_sensors = len(env.robot.sensors_names[imu.type])

diff --git a/python/gym_jiminy/common/gym_jiminy/common/blocks/motor_safety_limit.py b/python/gym_jiminy/common/gym_jiminy/common/blocks/motor_safety_limit.py
@@ -1,7 +1,7 @@
 """Implementation of basic Proportional-Derivative controller block compatible
 with gym_jiminy reinforcement learning pipeline environment design.
 """
-from typing import Any, List, Optional
+from typing import List
 
 import numpy as np
 import numba as nb
@@ -43,7 +43,6 @@ def apply_safety_limits(command: np.ndarray,
     return np.clip(command, safe_effort_lower, safe_effort_upper)
 
 
-
 class MotorSafetyLimit(
         BaseControllerBlock[np.ndarray, np.ndarray, BaseObsT, np.ndarray]):
     """Safety mechanism designed to avoid damaging the hardware.
@@ -74,7 +73,7 @@ def __init__(self,
                  kp: float,
                  kd: float,
                  soft_position_margin: float = 0.0,
-                 **kwargs: Any) -> None:
+                 soft_velocity_max: float = float("inf")) -> None:
         """
         :param name: Name of the block.
         :param env: Environment to connect with.
@@ -84,8 +83,8 @@ def __init__(self,
         :param soft_position_margin: Minimum distance of the current motor
                                      positions from their respective bounds
                                      before starting to break.
-        :param kwargs: Used arguments to allow automatic pipeline wrapper
-                       generation.
+        :param soft_velocity_max: Maximum velocity of the motor before
+                                  starting to break.
         """
         # Make sure the action space of the environment has not been altered
         if env.action_space is not env.unwrapped.action_space:
@@ -96,18 +95,19 @@ def __init__(self,
         # Backup some user argument(s)
         self.kp = kp
         self.kd = kd
-        self.soft_position_margin = soft_position_margin
 
         # Define buffers storing information about the motors for efficiency
         motors_position_idx: List[int] = sum(env.robot.motors_position_idx, [])
-        self.motors_soft_position_lower = env.robot.position_limit_lower[
-            motors_position_idx] + self.soft_position_margin
-        self.motors_soft_position_upper = env.robot.position_limit_upper[
-            motors_position_idx] - self.soft_position_margin
-        self.motors_velocity_limit = env.robot.velocity_limit[
-            env.robot.motors_velocity_idx]
+        self.motors_position_lower = env.robot.position_limit_lower[
+            motors_position_idx] + soft_position_margin
+        self.motors_position_upper = env.robot.position_limit_upper[
+            motors_position_idx] - soft_position_margin
+        self.motors_velocity_limit = np.minimum(env.robot.velocity_limit[
+            env.robot.motors_velocity_idx], soft_velocity_max)
         self.motors_effort_limit = env.robot.command_limit[
             env.robot.motors_velocity_idx]
+        self.motors_effort_limit[
+            self.motors_position_lower > self.motors_position_upper] = 0.0
 
         # Extract measured motor positions and velocities for fast access
         self.q_measured, self.v_measured = env.sensors_data[encoder.type]
@@ -160,7 +160,7 @@ def compute_command(self, action: np.ndarray) -> np.ndarray:
                                    v_measured,
                                    self.kp,
                                    self.kd,
-                                   self.motors_soft_position_lower,
-                                   self.motors_soft_position_upper,
+                                   self.motors_position_lower,
+                                   self.motors_position_upper,
                                    self.motors_velocity_limit,
                                    self.motors_effort_limit)
diff --git a/python/gym_jiminy/common/gym_jiminy/common/blocks/proportional_derivative_controller.py b/python/gym_jiminy/common/gym_jiminy/common/blocks/proportional_derivative_controller.py
@@ -2,7 +2,7 @@
 with gym_jiminy reinforcement learning pipeline environment design.
 """
 import math
-from typing import Any, List, Union
+from typing import List, Union
 
 import numpy as np
 import numba as nb
@@ -212,8 +212,7 @@ def __init__(self,
                  kp: Union[float, List[float], np.ndarray],
                  kd: Union[float, List[float], np.ndarray],
                  target_position_margin: float = 0.0,
-                 target_velocity_limit: float = float("inf"),
-                 **kwargs: Any) -> None:
+                 target_velocity_limit: float = float("inf")) -> None:
         """
         :param name: Name of the block.
         :param env: Environment to connect with.
@@ -225,8 +224,6 @@ def __init__(self,
         :param target_position_margin: Minimum distance of the motor target
                                        positions from their respective bounds.
         :param target_velocity_limit: Maximum motor target velocities.
-        :param kwargs: Used arguments to allow automatic pipeline wrapper
-                       generation.
         """
         # Make sure that the specified derivative order is valid
         assert (0 < order < 4), "Derivative order of command out-of-bounds"

diff --git a/python/gym_jiminy/common/gym_jiminy/common/envs/env_locomotion.py b/python/gym_jiminy/common/gym_jiminy/common/envs/env_locomotion.py
@@ -56,8 +56,8 @@
     imu.type:  np.array([0.01, 0.01, 0.01, 0.02, 0.02, 0.02, 0.0, 0.0, 0.0])
 }
 
-DEFAULT_SIMULATION_DURATION = 20.0  # (s) Default simulation duration
-DEFAULT_STEP_DT = 0.001             # (s) Stepper update period
+DEFAULT_SIMULATION_DURATION = 30.0  # (s) Default simulation duration
+DEFAULT_STEP_DT = 0.04              # (s) Stepper update period
 
 DEFAULT_HLC_TO_LLC_RATIO = 1  # (NA)
 

diff --git a/python/gym_jiminy/envs/gym_jiminy/envs/anymal.py b/python/gym_jiminy/envs/gym_jiminy/envs/anymal.py
@@ -4,8 +4,6 @@
 import sys
 from typing import Any
 
-import numpy as np
-
 from gym_jiminy.common.envs import WalkerJiminyEnv
 from gym_jiminy.common.blocks import PDController, MahonyFilter
 from gym_jiminy.common.pipeline import build_pipeline
@@ -25,11 +23,11 @@
 STEP_DT = 0.04
 
 # PID proportional gains (one per actuated joint)
-PD_KP = np.array([1500.0, 1500.0, 1500.0, 1500.0, 1500.0, 1500.0,
-                  1500.0, 1500.0, 1500.0, 1500.0, 1500.0, 1500.0])
+PD_KP = (1500.0, 1500.0, 1500.0, 1500.0, 1500.0, 1500.0,
+         1500.0, 1500.0, 1500.0, 1500.0, 1500.0, 1500.0)
 # PID derivative gains (one per actuated joint)
-PD_KD = np.array([0.01, 0.01, 0.01, 0.01, 0.01, 0.01,
-                  0.01, 0.01, 0.01, 0.01, 0.01, 0.01])
+PD_KD = (0.01, 0.01, 0.01, 0.01, 0.01, 0.01,
+         0.01, 0.01, 0.01, 0.01, 0.01, 0.01)
 
 # Mahony filter proportional and derivative gains
 MAHONY_KP = 1.0
@@ -39,7 +37,7 @@
 REWARD_MIXTURE = {
     'direction': 0.0,
     'energy': 0.0,
-    'done': 1.0
+    'survival': 1.0
 }
 # Standard deviation ratio of each individual origin of randomness
 STD_RATIO = {

diff --git a/python/gym_jiminy/envs/gym_jiminy/envs/atlas.py b/python/gym_jiminy/envs/gym_jiminy/envs/atlas.py
@@ -11,7 +11,9 @@
 from pinocchio import neutral, buildReducedModel
 
 from gym_jiminy.common.envs import WalkerJiminyEnv
-from gym_jiminy.common.blocks import MotorSafetyLimit, PDController, MahonyFilter
+from gym_jiminy.common.blocks import (MotorSafetyLimit,
+                                      PDController,
+                                      MahonyFilter)
 from gym_jiminy.common.pipeline import build_pipeline
 from gym_jiminy.toolbox.math import ConvexHull
 
@@ -34,22 +36,21 @@
 # Stepper update period (:float [s])
 STEP_DT = 0.04
 
-MOTOR_SAFETY_KP = 50.0
-MOTOR_SAFETY_KD = 4.0
+MOTOR_VELOCITY_MAX = 3.0
 
 # PID proportional gains (one per actuated joint)
-PD_REDUCED_KP = np.array([
+PD_REDUCED_KP = (
     # Left leg: [HpX, HpZ, HpY, KnY, AkY, AkX]
     5000.0, 5000.0, 8000.0, 4000.0, 8000.0, 5000.0,
     # Right leg: [HpX, HpZ, HpY, KnY, AkY, AkX]
-    5000.0, 5000.0, 8000.0, 4000.0, 8000.0, 5000.0])
-PD_REDUCED_KD = np.array([
+    5000.0, 5000.0, 8000.0, 4000.0, 8000.0, 5000.0)
+PD_REDUCED_KD = (
     # Left leg: [HpX, HpZ, HpY, KnY, AkY, AkX]
     0.02, 0.01, 0.015, 0.01, 0.015, 0.01,
     # Right leg: [HpX, HpZ, HpY, KnY, AkY, AkX]
-    0.02, 0.01, 0.015, 0.01, 0.015, 0.01])
+    0.02, 0.01, 0.015, 0.01, 0.015, 0.01)
 
-PD_FULL_KP = np.array([
+PD_FULL_KP = (
     # Neck: [Y]
     100.0,
     # Back: [Z, Y, X]
@@ -59,8 +60,8 @@
     # Right arm: [ShZ, ShX, ElY, ElX, WrY, WrX, WrY2]
     500.0, 100.0, 200.0, 500.0, 10.0, 100.0, 10.0,
     # Lower body motors
-    *PD_REDUCED_KP])
-PD_FULL_KD = np.array([
+    *PD_REDUCED_KP)
+PD_FULL_KD = (
     # Neck: [Y]
     0.01,
     # Back: [Z, Y, X]
@@ -70,7 +71,7 @@
     # Right arm: [ShZ, ShX, ElY, ElX, WrY, WrX, WrY2]
     0.01, 0.01, 0.01, 0.02, 0.01, 0.02, 0.02,
     # Lower body motors
-    *PD_REDUCED_KD])
+    *PD_REDUCED_KD)
 
 # Mahony filter proportional and derivative gains
 # See: https://cas.mines-paristech.fr/~petit/papers/ral22/main.pdf
@@ -81,7 +82,7 @@
 REWARD_MIXTURE = {
     'direction': 0.0,
     'energy': 0.0,
-    'done': 1.0
+    'survival': 1.0
 }
 # Standard deviation ratio of each individual origin of randomness
 STD_RATIO = {
@@ -239,16 +240,6 @@ def joint_position_idx(joint_name: str) -> int:
         cls=AtlasJiminyEnv
     ),
     layers_config=[
-        dict(
-            block=dict(
-                cls=MotorSafetyLimit,
-                kwargs=dict(
-                    kp=MOTOR_SAFETY_KP,
-                    kd=MOTOR_SAFETY_KD,
-                    soft_position_margin=0.0
-                )
-            ),
-        ),
         dict(
             block=dict(
                 cls=PDController,
@@ -258,7 +249,7 @@ def joint_position_idx(joint_name: str) -> int:
                     kp=PD_FULL_KP,
                     kd=PD_FULL_KD,
                     target_position_margin=0.0,
-                    target_velocity_limit=float("inf")
+                    target_velocity_limit=MOTOR_VELOCITY_MAX,
                 )
             ),
             wrapper=dict(
@@ -284,16 +275,6 @@ def joint_position_idx(joint_name: str) -> int:
         cls=AtlasReducedJiminyEnv
     ),
     layers_config=[
-        dict(
-            block=dict(
-                cls=MotorSafetyLimit,
-                kwargs=dict(
-                    kp=MOTOR_SAFETY_KP,
-                    kd=MOTOR_SAFETY_KD,
-                    soft_position_margin=0.0
-                )
-            ),
-        ),
         dict(
             block=dict(
                 cls=PDController,
@@ -303,7 +284,7 @@ def joint_position_idx(joint_name: str) -> int:
                     kp=PD_REDUCED_KP,
                     kd=PD_REDUCED_KD,
                     target_position_margin=0.0,
-                    target_velocity_limit=float("inf")
+                    target_velocity_limit=MOTOR_VELOCITY_MAX,
                 )
             ),
             wrapper=dict(

diff --git a/python/gym_jiminy/envs/gym_jiminy/envs/cassie.py b/python/gym_jiminy/envs/gym_jiminy/envs/cassie.py
@@ -41,11 +41,11 @@
 STEP_DT = 0.04
 
 # PID proportional gains (one per actuated joint)
-PD_KP = np.array([50.0, 50.0, 50.0, 80.0, 8.0,
-                  50.0, 50.0, 50.0, 80.0, 8.0])
+PD_KP = (50.0, 50.0, 50.0, 80.0, 8.0,
+         50.0, 50.0, 50.0, 80.0, 8.0)
 # PID derivative gains (one per actuated joint)
-PD_KD = np.array([0.01, 0.02, 0.02, 0.03, 0.02,
-                  0.01, 0.02, 0.02, 0.03, 0.02])
+PD_KD = (0.01, 0.02, 0.02, 0.03, 0.02,
+         0.01, 0.02, 0.02, 0.03, 0.02)
 
 # Mahony filter proportional and derivative gains
 MAHONY_KP = 1.0
@@ -55,7 +55,7 @@
 REWARD_MIXTURE = {
     'direction': 0.0,
     'energy': 0.0,
-    'done': 1.0
+    'survival': 1.0
 }
 # Standard deviation ratio of each individual origin of randomness
 STD_RATIO = {

diff --git a/python/gym_jiminy/examples/reinforcement_learning/torchrl/acrobot_ppo.py b/python/gym_jiminy/examples/reinforcement_learning/torchrl/acrobot_ppo.py
@@ -56,6 +56,9 @@
 
 
 if __name__ == '__main__':
+    # Fix weird issue with multiprocessing
+    __spec__ = None
+
     # Enforce seed for most common libraries
     os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
     random.seed(SEED)

diff --git a/python/gym_jiminy/unit_py/test_pipeline_control.py b/python/gym_jiminy/unit_py/test_pipeline_control.py
@@ -162,19 +162,20 @@ def test_pid_controller(self):
         assert isinstance(controller, PDController) and controller.order == 1
         ctrl_name = controller.name
         n_motors = len(controller.fieldnames)
-        p = env.log_data["variables"][".".join((
+        pos = env.log_data["variables"][".".join((
             "HighLevelController", ctrl_name, "state", str(n_motors - 1)))]
-        v = env.log_data["variables"][".".join((
+        vel = env.log_data["variables"][".".join((
             "HighLevelController", ctrl_name, controller.fieldnames[-1]))]
 
         # Make sure that the position and velocity targets are consistent
         self.assertTrue(np.allclose(
-            np.diff(p) / controller.control_dt, v[1:], atol=TOLERANCE))
+            np.diff(pos) / controller.control_dt, vel[1:], atol=TOLERANCE))
 
-        # Make sure that the position and velocity targets are within bounds
+        # Make sure that the position targets are within bounds.
+        # No such guarantee can be provided for higher-order derivatives.
         robot = env.robot
         p_min = robot.position_limit_lower[robot.motors_position_idx[-1]]
         p_max = robot.position_limit_upper[robot.motors_position_idx[-1]]
         self.assertTrue(np.all(np.logical_and(p_min < p, p < p_max)))
-        v_max = robot.velocity_limit[robot.motors_velocity_idx[-1]]
-        self.assertTrue(np.all(np.logical_and(-v_max < v, v < v_max)))
+        # v_max = robot.velocity_limit[robot.motors_velocity_idx[-1]]
+        # self.assertTrue(np.all(np.logical_and(-v_max < v, v < v_max)))
diff --git a/python/gym_jiminy/unit_py/test_pipeline_design.py b/python/gym_jiminy/unit_py/test_pipeline_design.py
@@ -10,6 +10,8 @@
 from gym_jiminy.common.bases import JiminyEnvInterface
 
 
+TOLERANCE = 1.0e-6
+
 class PipelineDesign(unittest.TestCase):
     """ TODO: Write documentation
     """
@@ -155,8 +157,10 @@ def test_step_state(self):
 
         # Observation stacking is skipping the required number of frames
         stack_dt = (self.skip_frames_ratio + 1) * env.observe_dt
-        for i in range(3):
-            self.assertEqual(obs['t'][i], i * stack_dt)
+        t_obs_last = env.step_dt - env.step_dt % stack_dt
+        for i in range(self.num_stack):
+            self.assertTrue(np.isclose(
+                obs['t'][::-1][i], t_obs_last - i * stack_dt, TOLERANCE))
 
         # Initial observation is consistent with internal simulator state
         controller_target_obs = obs['actions']['controller_0']
@@ -175,7 +179,7 @@ def test_step_state(self):
             obs, *_ = env.step(action)
         for i, t in enumerate(np.flip(obs['t'])):
             self.assertTrue(np.isclose(
-                t, n_steps_breakpoint * env.step_dt - i * stack_dt, 1.0e-6))
+                t, n_steps_breakpoint * env.step_dt - i * stack_dt, TOLERANCE))
         imu_data_ref = env.simulator.robot.sensors_data['ImuSensor']
         imu_data_obs = obs['measurements']['ImuSensor'][-1]
         self.assertTrue(np.all(imu_data_ref == imu_data_obs))