diff --git a/python/gym_jiminy/common/gym_jiminy/common/blocks/mahony_filter.py b/python/gym_jiminy/common/gym_jiminy/common/blocks/mahony_filter.py index dfeb2ee9aa..2dc38a0628 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/blocks/mahony_filter.py +++ b/python/gym_jiminy/common/gym_jiminy/common/blocks/mahony_filter.py @@ -103,8 +103,7 @@ def __init__(self, update_ratio: int = 1, exact_init: bool = True, kp: Union[np.ndarray, float] = 1.0, - ki: Union[np.ndarray, float] = 0.1, - **kwargs: Any) -> None: + ki: Union[np.ndarray, float] = 0.1) -> None: """ :param name: Name of the block. :param env: Environment to connect with. @@ -116,8 +115,6 @@ def __init__(self, free-falling at init, which is not realistic anyway. :param mahony_kp: Proportional gain used for gyro-accel sensor fusion. :param mahony_ki: Integral gain used for gyro bias estimate. - :param kwargs: Used arguments to allow automatic pipeline wrapper - generation. """ # Handling of default argument(s) num_imu_sensors = len(env.robot.sensors_names[imu.type]) diff --git a/python/gym_jiminy/common/gym_jiminy/common/blocks/motor_safety_limit.py b/python/gym_jiminy/common/gym_jiminy/common/blocks/motor_safety_limit.py index 208a9580fc..a7b1501719 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/blocks/motor_safety_limit.py +++ b/python/gym_jiminy/common/gym_jiminy/common/blocks/motor_safety_limit.py @@ -1,7 +1,7 @@ """Implementation of basic Proportional-Derivative controller block compatible with gym_jiminy reinforcement learning pipeline environment design. """ -from typing import Any, List, Optional +from typing import List import numpy as np import numba as nb @@ -43,7 +43,6 @@ def apply_safety_limits(command: np.ndarray, return np.clip(command, safe_effort_lower, safe_effort_upper) - class MotorSafetyLimit( BaseControllerBlock[np.ndarray, np.ndarray, BaseObsT, np.ndarray]): """Safety mechanism designed to avoid damaging the hardware. @@ -74,7 +73,7 @@ def __init__(self, kp: float, kd: float, soft_position_margin: float = 0.0, - **kwargs: Any) -> None: + soft_velocity_max: float = float("inf")) -> None: """ :param name: Name of the block. :param env: Environment to connect with. @@ -84,8 +83,8 @@ def __init__(self, :param soft_position_margin: Minimum distance of the current motor positions from their respective bounds before starting to break. - :param kwargs: Used arguments to allow automatic pipeline wrapper - generation. + :param soft_velocity_max: Maximum velocity of the motor before + starting to break. """ # Make sure the action space of the environment has not been altered if env.action_space is not env.unwrapped.action_space: @@ -96,18 +95,19 @@ def __init__(self, # Backup some user argument(s) self.kp = kp self.kd = kd - self.soft_position_margin = soft_position_margin # Define buffers storing information about the motors for efficiency motors_position_idx: List[int] = sum(env.robot.motors_position_idx, []) - self.motors_soft_position_lower = env.robot.position_limit_lower[ - motors_position_idx] + self.soft_position_margin - self.motors_soft_position_upper = env.robot.position_limit_upper[ - motors_position_idx] - self.soft_position_margin - self.motors_velocity_limit = env.robot.velocity_limit[ - env.robot.motors_velocity_idx] + self.motors_position_lower = env.robot.position_limit_lower[ + motors_position_idx] + soft_position_margin + self.motors_position_upper = env.robot.position_limit_upper[ + motors_position_idx] - soft_position_margin + self.motors_velocity_limit = np.minimum(env.robot.velocity_limit[ + env.robot.motors_velocity_idx], soft_velocity_max) self.motors_effort_limit = env.robot.command_limit[ env.robot.motors_velocity_idx] + self.motors_effort_limit[ + self.motors_position_lower > self.motors_position_upper] = 0.0 # Extract measured motor positions and velocities for fast access self.q_measured, self.v_measured = env.sensors_data[encoder.type] @@ -160,7 +160,7 @@ def compute_command(self, action: np.ndarray) -> np.ndarray: v_measured, self.kp, self.kd, - self.motors_soft_position_lower, - self.motors_soft_position_upper, + self.motors_position_lower, + self.motors_position_upper, self.motors_velocity_limit, self.motors_effort_limit) diff --git a/python/gym_jiminy/common/gym_jiminy/common/blocks/proportional_derivative_controller.py b/python/gym_jiminy/common/gym_jiminy/common/blocks/proportional_derivative_controller.py index d1c8399a6c..f108090c7d 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/blocks/proportional_derivative_controller.py +++ b/python/gym_jiminy/common/gym_jiminy/common/blocks/proportional_derivative_controller.py @@ -2,7 +2,7 @@ with gym_jiminy reinforcement learning pipeline environment design. """ import math -from typing import Any, List, Union +from typing import List, Union import numpy as np import numba as nb @@ -212,8 +212,7 @@ def __init__(self, kp: Union[float, List[float], np.ndarray], kd: Union[float, List[float], np.ndarray], target_position_margin: float = 0.0, - target_velocity_limit: float = float("inf"), - **kwargs: Any) -> None: + target_velocity_limit: float = float("inf")) -> None: """ :param name: Name of the block. :param env: Environment to connect with. @@ -225,8 +224,6 @@ def __init__(self, :param target_position_margin: Minimum distance of the motor target positions from their respective bounds. :param target_velocity_limit: Maximum motor target velocities. - :param kwargs: Used arguments to allow automatic pipeline wrapper - generation. """ # Make sure that the specified derivative order is valid assert (0 < order < 4), "Derivative order of command out-of-bounds" diff --git a/python/gym_jiminy/common/gym_jiminy/common/envs/env_locomotion.py b/python/gym_jiminy/common/gym_jiminy/common/envs/env_locomotion.py index 6e0deadd84..d7fc854cc0 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/envs/env_locomotion.py +++ b/python/gym_jiminy/common/gym_jiminy/common/envs/env_locomotion.py @@ -56,8 +56,8 @@ imu.type: np.array([0.01, 0.01, 0.01, 0.02, 0.02, 0.02, 0.0, 0.0, 0.0]) } -DEFAULT_SIMULATION_DURATION = 20.0 # (s) Default simulation duration -DEFAULT_STEP_DT = 0.001 # (s) Stepper update period +DEFAULT_SIMULATION_DURATION = 30.0 # (s) Default simulation duration +DEFAULT_STEP_DT = 0.04 # (s) Stepper update period DEFAULT_HLC_TO_LLC_RATIO = 1 # (NA) diff --git a/python/gym_jiminy/envs/gym_jiminy/envs/anymal.py b/python/gym_jiminy/envs/gym_jiminy/envs/anymal.py index e2b82b6abc..ca54c00cbd 100644 --- a/python/gym_jiminy/envs/gym_jiminy/envs/anymal.py +++ b/python/gym_jiminy/envs/gym_jiminy/envs/anymal.py @@ -4,8 +4,6 @@ import sys from typing import Any -import numpy as np - from gym_jiminy.common.envs import WalkerJiminyEnv from gym_jiminy.common.blocks import PDController, MahonyFilter from gym_jiminy.common.pipeline import build_pipeline @@ -25,11 +23,11 @@ STEP_DT = 0.04 # PID proportional gains (one per actuated joint) -PD_KP = np.array([1500.0, 1500.0, 1500.0, 1500.0, 1500.0, 1500.0, - 1500.0, 1500.0, 1500.0, 1500.0, 1500.0, 1500.0]) +PD_KP = (1500.0, 1500.0, 1500.0, 1500.0, 1500.0, 1500.0, + 1500.0, 1500.0, 1500.0, 1500.0, 1500.0, 1500.0) # PID derivative gains (one per actuated joint) -PD_KD = np.array([0.01, 0.01, 0.01, 0.01, 0.01, 0.01, - 0.01, 0.01, 0.01, 0.01, 0.01, 0.01]) +PD_KD = (0.01, 0.01, 0.01, 0.01, 0.01, 0.01, + 0.01, 0.01, 0.01, 0.01, 0.01, 0.01) # Mahony filter proportional and derivative gains MAHONY_KP = 1.0 @@ -39,7 +37,7 @@ REWARD_MIXTURE = { 'direction': 0.0, 'energy': 0.0, - 'done': 1.0 + 'survival': 1.0 } # Standard deviation ratio of each individual origin of randomness STD_RATIO = { diff --git a/python/gym_jiminy/envs/gym_jiminy/envs/atlas.py b/python/gym_jiminy/envs/gym_jiminy/envs/atlas.py index 084ac83df8..f6db2a6892 100644 --- a/python/gym_jiminy/envs/gym_jiminy/envs/atlas.py +++ b/python/gym_jiminy/envs/gym_jiminy/envs/atlas.py @@ -11,7 +11,9 @@ from pinocchio import neutral, buildReducedModel from gym_jiminy.common.envs import WalkerJiminyEnv -from gym_jiminy.common.blocks import MotorSafetyLimit, PDController, MahonyFilter +from gym_jiminy.common.blocks import (MotorSafetyLimit, + PDController, + MahonyFilter) from gym_jiminy.common.pipeline import build_pipeline from gym_jiminy.toolbox.math import ConvexHull @@ -34,22 +36,21 @@ # Stepper update period (:float [s]) STEP_DT = 0.04 -MOTOR_SAFETY_KP = 50.0 -MOTOR_SAFETY_KD = 4.0 +MOTOR_VELOCITY_MAX = 3.0 # PID proportional gains (one per actuated joint) -PD_REDUCED_KP = np.array([ +PD_REDUCED_KP = ( # Left leg: [HpX, HpZ, HpY, KnY, AkY, AkX] 5000.0, 5000.0, 8000.0, 4000.0, 8000.0, 5000.0, # Right leg: [HpX, HpZ, HpY, KnY, AkY, AkX] - 5000.0, 5000.0, 8000.0, 4000.0, 8000.0, 5000.0]) -PD_REDUCED_KD = np.array([ + 5000.0, 5000.0, 8000.0, 4000.0, 8000.0, 5000.0) +PD_REDUCED_KD = ( # Left leg: [HpX, HpZ, HpY, KnY, AkY, AkX] 0.02, 0.01, 0.015, 0.01, 0.015, 0.01, # Right leg: [HpX, HpZ, HpY, KnY, AkY, AkX] - 0.02, 0.01, 0.015, 0.01, 0.015, 0.01]) + 0.02, 0.01, 0.015, 0.01, 0.015, 0.01) -PD_FULL_KP = np.array([ +PD_FULL_KP = ( # Neck: [Y] 100.0, # Back: [Z, Y, X] @@ -59,8 +60,8 @@ # Right arm: [ShZ, ShX, ElY, ElX, WrY, WrX, WrY2] 500.0, 100.0, 200.0, 500.0, 10.0, 100.0, 10.0, # Lower body motors - *PD_REDUCED_KP]) -PD_FULL_KD = np.array([ + *PD_REDUCED_KP) +PD_FULL_KD = ( # Neck: [Y] 0.01, # Back: [Z, Y, X] @@ -70,7 +71,7 @@ # Right arm: [ShZ, ShX, ElY, ElX, WrY, WrX, WrY2] 0.01, 0.01, 0.01, 0.02, 0.01, 0.02, 0.02, # Lower body motors - *PD_REDUCED_KD]) + *PD_REDUCED_KD) # Mahony filter proportional and derivative gains # See: https://cas.mines-paristech.fr/~petit/papers/ral22/main.pdf @@ -81,7 +82,7 @@ REWARD_MIXTURE = { 'direction': 0.0, 'energy': 0.0, - 'done': 1.0 + 'survival': 1.0 } # Standard deviation ratio of each individual origin of randomness STD_RATIO = { @@ -239,16 +240,6 @@ def joint_position_idx(joint_name: str) -> int: cls=AtlasJiminyEnv ), layers_config=[ - dict( - block=dict( - cls=MotorSafetyLimit, - kwargs=dict( - kp=MOTOR_SAFETY_KP, - kd=MOTOR_SAFETY_KD, - soft_position_margin=0.0 - ) - ), - ), dict( block=dict( cls=PDController, @@ -258,7 +249,7 @@ def joint_position_idx(joint_name: str) -> int: kp=PD_FULL_KP, kd=PD_FULL_KD, target_position_margin=0.0, - target_velocity_limit=float("inf") + target_velocity_limit=MOTOR_VELOCITY_MAX, ) ), wrapper=dict( @@ -284,16 +275,6 @@ def joint_position_idx(joint_name: str) -> int: cls=AtlasReducedJiminyEnv ), layers_config=[ - dict( - block=dict( - cls=MotorSafetyLimit, - kwargs=dict( - kp=MOTOR_SAFETY_KP, - kd=MOTOR_SAFETY_KD, - soft_position_margin=0.0 - ) - ), - ), dict( block=dict( cls=PDController, @@ -303,7 +284,7 @@ def joint_position_idx(joint_name: str) -> int: kp=PD_REDUCED_KP, kd=PD_REDUCED_KD, target_position_margin=0.0, - target_velocity_limit=float("inf") + target_velocity_limit=MOTOR_VELOCITY_MAX, ) ), wrapper=dict( diff --git a/python/gym_jiminy/envs/gym_jiminy/envs/cassie.py b/python/gym_jiminy/envs/gym_jiminy/envs/cassie.py index 1915f32300..018af349a3 100644 --- a/python/gym_jiminy/envs/gym_jiminy/envs/cassie.py +++ b/python/gym_jiminy/envs/gym_jiminy/envs/cassie.py @@ -41,11 +41,11 @@ STEP_DT = 0.04 # PID proportional gains (one per actuated joint) -PD_KP = np.array([50.0, 50.0, 50.0, 80.0, 8.0, - 50.0, 50.0, 50.0, 80.0, 8.0]) +PD_KP = (50.0, 50.0, 50.0, 80.0, 8.0, + 50.0, 50.0, 50.0, 80.0, 8.0) # PID derivative gains (one per actuated joint) -PD_KD = np.array([0.01, 0.02, 0.02, 0.03, 0.02, - 0.01, 0.02, 0.02, 0.03, 0.02]) +PD_KD = (0.01, 0.02, 0.02, 0.03, 0.02, + 0.01, 0.02, 0.02, 0.03, 0.02) # Mahony filter proportional and derivative gains MAHONY_KP = 1.0 @@ -55,7 +55,7 @@ REWARD_MIXTURE = { 'direction': 0.0, 'energy': 0.0, - 'done': 1.0 + 'survival': 1.0 } # Standard deviation ratio of each individual origin of randomness STD_RATIO = { diff --git a/python/gym_jiminy/examples/reinforcement_learning/torchrl/acrobot_ppo.py b/python/gym_jiminy/examples/reinforcement_learning/torchrl/acrobot_ppo.py index c95224b190..4794d3b70a 100644 --- a/python/gym_jiminy/examples/reinforcement_learning/torchrl/acrobot_ppo.py +++ b/python/gym_jiminy/examples/reinforcement_learning/torchrl/acrobot_ppo.py @@ -56,6 +56,9 @@ if __name__ == '__main__': + # Fix weird issue with multiprocessing + __spec__ = None + # Enforce seed for most common libraries os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" random.seed(SEED) diff --git a/python/gym_jiminy/unit_py/test_pipeline_control.py b/python/gym_jiminy/unit_py/test_pipeline_control.py index a05d20042f..faaf1f7d8e 100644 --- a/python/gym_jiminy/unit_py/test_pipeline_control.py +++ b/python/gym_jiminy/unit_py/test_pipeline_control.py @@ -162,19 +162,20 @@ def test_pid_controller(self): assert isinstance(controller, PDController) and controller.order == 1 ctrl_name = controller.name n_motors = len(controller.fieldnames) - p = env.log_data["variables"][".".join(( + pos = env.log_data["variables"][".".join(( "HighLevelController", ctrl_name, "state", str(n_motors - 1)))] - v = env.log_data["variables"][".".join(( + vel = env.log_data["variables"][".".join(( "HighLevelController", ctrl_name, controller.fieldnames[-1]))] # Make sure that the position and velocity targets are consistent self.assertTrue(np.allclose( - np.diff(p) / controller.control_dt, v[1:], atol=TOLERANCE)) + np.diff(pos) / controller.control_dt, vel[1:], atol=TOLERANCE)) - # Make sure that the position and velocity targets are within bounds + # Make sure that the position targets are within bounds. + # No such guarantee can be provided for higher-order derivatives. robot = env.robot p_min = robot.position_limit_lower[robot.motors_position_idx[-1]] p_max = robot.position_limit_upper[robot.motors_position_idx[-1]] self.assertTrue(np.all(np.logical_and(p_min < p, p < p_max))) - v_max = robot.velocity_limit[robot.motors_velocity_idx[-1]] - self.assertTrue(np.all(np.logical_and(-v_max < v, v < v_max))) + # v_max = robot.velocity_limit[robot.motors_velocity_idx[-1]] + # self.assertTrue(np.all(np.logical_and(-v_max < v, v < v_max))) diff --git a/python/gym_jiminy/unit_py/test_pipeline_design.py b/python/gym_jiminy/unit_py/test_pipeline_design.py index 9157f2a5c7..7e911d247c 100644 --- a/python/gym_jiminy/unit_py/test_pipeline_design.py +++ b/python/gym_jiminy/unit_py/test_pipeline_design.py @@ -10,6 +10,8 @@ from gym_jiminy.common.bases import JiminyEnvInterface +TOLERANCE = 1.0e-6 + class PipelineDesign(unittest.TestCase): """ TODO: Write documentation """ @@ -155,8 +157,10 @@ def test_step_state(self): # Observation stacking is skipping the required number of frames stack_dt = (self.skip_frames_ratio + 1) * env.observe_dt - for i in range(3): - self.assertEqual(obs['t'][i], i * stack_dt) + t_obs_last = env.step_dt - env.step_dt % stack_dt + for i in range(self.num_stack): + self.assertTrue(np.isclose( + obs['t'][::-1][i], t_obs_last - i * stack_dt, TOLERANCE)) # Initial observation is consistent with internal simulator state controller_target_obs = obs['actions']['controller_0'] @@ -175,7 +179,7 @@ def test_step_state(self): obs, *_ = env.step(action) for i, t in enumerate(np.flip(obs['t'])): self.assertTrue(np.isclose( - t, n_steps_breakpoint * env.step_dt - i * stack_dt, 1.0e-6)) + t, n_steps_breakpoint * env.step_dt - i * stack_dt, TOLERANCE)) imu_data_ref = env.simulator.robot.sensors_data['ImuSensor'] imu_data_obs = obs['measurements']['ImuSensor'][-1] self.assertTrue(np.all(imu_data_ref == imu_data_obs))