Skip to content

Commit

Permalink
Tuning parameters.
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexis Duburcq committed Aug 26, 2023
1 parent 16a5154 commit 504c6dd
Show file tree
Hide file tree
Showing 10 changed files with 61 additions and 80 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,7 @@ def __init__(self,
update_ratio: int = 1,
exact_init: bool = True,
kp: Union[np.ndarray, float] = 1.0,
ki: Union[np.ndarray, float] = 0.1,
**kwargs: Any) -> None:
ki: Union[np.ndarray, float] = 0.1) -> None:
"""
:param name: Name of the block.
:param env: Environment to connect with.
Expand All @@ -116,8 +115,6 @@ def __init__(self,
free-falling at init, which is not realistic anyway.
:param mahony_kp: Proportional gain used for gyro-accel sensor fusion.
:param mahony_ki: Integral gain used for gyro bias estimate.
:param kwargs: Used arguments to allow automatic pipeline wrapper
generation.
"""
# Handling of default argument(s)
num_imu_sensors = len(env.robot.sensors_names[imu.type])
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Implementation of basic Proportional-Derivative controller block compatible
with gym_jiminy reinforcement learning pipeline environment design.
"""
from typing import Any, List, Optional
from typing import List

import numpy as np
import numba as nb
Expand Down Expand Up @@ -43,7 +43,6 @@ def apply_safety_limits(command: np.ndarray,
return np.clip(command, safe_effort_lower, safe_effort_upper)



class MotorSafetyLimit(
BaseControllerBlock[np.ndarray, np.ndarray, BaseObsT, np.ndarray]):
"""Safety mechanism designed to avoid damaging the hardware.
Expand Down Expand Up @@ -74,7 +73,7 @@ def __init__(self,
kp: float,
kd: float,
soft_position_margin: float = 0.0,
**kwargs: Any) -> None:
soft_velocity_max: float = float("inf")) -> None:
"""
:param name: Name of the block.
:param env: Environment to connect with.
Expand All @@ -84,8 +83,8 @@ def __init__(self,
:param soft_position_margin: Minimum distance of the current motor
positions from their respective bounds
before starting to break.
:param kwargs: Used arguments to allow automatic pipeline wrapper
generation.
:param soft_velocity_max: Maximum velocity of the motor before
starting to break.
"""
# Make sure the action space of the environment has not been altered
if env.action_space is not env.unwrapped.action_space:
Expand All @@ -96,18 +95,19 @@ def __init__(self,
# Backup some user argument(s)
self.kp = kp
self.kd = kd
self.soft_position_margin = soft_position_margin

# Define buffers storing information about the motors for efficiency
motors_position_idx: List[int] = sum(env.robot.motors_position_idx, [])
self.motors_soft_position_lower = env.robot.position_limit_lower[
motors_position_idx] + self.soft_position_margin
self.motors_soft_position_upper = env.robot.position_limit_upper[
motors_position_idx] - self.soft_position_margin
self.motors_velocity_limit = env.robot.velocity_limit[
env.robot.motors_velocity_idx]
self.motors_position_lower = env.robot.position_limit_lower[
motors_position_idx] + soft_position_margin
self.motors_position_upper = env.robot.position_limit_upper[
motors_position_idx] - soft_position_margin
self.motors_velocity_limit = np.minimum(env.robot.velocity_limit[
env.robot.motors_velocity_idx], soft_velocity_max)
self.motors_effort_limit = env.robot.command_limit[
env.robot.motors_velocity_idx]
self.motors_effort_limit[
self.motors_position_lower > self.motors_position_upper] = 0.0

# Extract measured motor positions and velocities for fast access
self.q_measured, self.v_measured = env.sensors_data[encoder.type]
Expand Down Expand Up @@ -160,7 +160,7 @@ def compute_command(self, action: np.ndarray) -> np.ndarray:
v_measured,
self.kp,
self.kd,
self.motors_soft_position_lower,
self.motors_soft_position_upper,
self.motors_position_lower,
self.motors_position_upper,
self.motors_velocity_limit,
self.motors_effort_limit)
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
with gym_jiminy reinforcement learning pipeline environment design.
"""
import math
from typing import Any, List, Union
from typing import List, Union

import numpy as np
import numba as nb
Expand Down Expand Up @@ -212,8 +212,7 @@ def __init__(self,
kp: Union[float, List[float], np.ndarray],
kd: Union[float, List[float], np.ndarray],
target_position_margin: float = 0.0,
target_velocity_limit: float = float("inf"),
**kwargs: Any) -> None:
target_velocity_limit: float = float("inf")) -> None:
"""
:param name: Name of the block.
:param env: Environment to connect with.
Expand All @@ -225,8 +224,6 @@ def __init__(self,
:param target_position_margin: Minimum distance of the motor target
positions from their respective bounds.
:param target_velocity_limit: Maximum motor target velocities.
:param kwargs: Used arguments to allow automatic pipeline wrapper
generation.
"""
# Make sure that the specified derivative order is valid
assert (0 < order < 4), "Derivative order of command out-of-bounds"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@
imu.type: np.array([0.01, 0.01, 0.01, 0.02, 0.02, 0.02, 0.0, 0.0, 0.0])
}

DEFAULT_SIMULATION_DURATION = 20.0 # (s) Default simulation duration
DEFAULT_STEP_DT = 0.001 # (s) Stepper update period
DEFAULT_SIMULATION_DURATION = 30.0 # (s) Default simulation duration
DEFAULT_STEP_DT = 0.04 # (s) Stepper update period

DEFAULT_HLC_TO_LLC_RATIO = 1 # (NA)

Expand Down
12 changes: 5 additions & 7 deletions python/gym_jiminy/envs/gym_jiminy/envs/anymal.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
import sys
from typing import Any

import numpy as np

from gym_jiminy.common.envs import WalkerJiminyEnv
from gym_jiminy.common.blocks import PDController, MahonyFilter
from gym_jiminy.common.pipeline import build_pipeline
Expand All @@ -25,11 +23,11 @@
STEP_DT = 0.04

# PID proportional gains (one per actuated joint)
PD_KP = np.array([1500.0, 1500.0, 1500.0, 1500.0, 1500.0, 1500.0,
1500.0, 1500.0, 1500.0, 1500.0, 1500.0, 1500.0])
PD_KP = (1500.0, 1500.0, 1500.0, 1500.0, 1500.0, 1500.0,
1500.0, 1500.0, 1500.0, 1500.0, 1500.0, 1500.0)
# PID derivative gains (one per actuated joint)
PD_KD = np.array([0.01, 0.01, 0.01, 0.01, 0.01, 0.01,
0.01, 0.01, 0.01, 0.01, 0.01, 0.01])
PD_KD = (0.01, 0.01, 0.01, 0.01, 0.01, 0.01,
0.01, 0.01, 0.01, 0.01, 0.01, 0.01)

# Mahony filter proportional and derivative gains
MAHONY_KP = 1.0
Expand All @@ -39,7 +37,7 @@
REWARD_MIXTURE = {
'direction': 0.0,
'energy': 0.0,
'done': 1.0
'survival': 1.0
}
# Standard deviation ratio of each individual origin of randomness
STD_RATIO = {
Expand Down
49 changes: 15 additions & 34 deletions python/gym_jiminy/envs/gym_jiminy/envs/atlas.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
from pinocchio import neutral, buildReducedModel

from gym_jiminy.common.envs import WalkerJiminyEnv
from gym_jiminy.common.blocks import MotorSafetyLimit, PDController, MahonyFilter
from gym_jiminy.common.blocks import (MotorSafetyLimit,
PDController,
MahonyFilter)
from gym_jiminy.common.pipeline import build_pipeline
from gym_jiminy.toolbox.math import ConvexHull

Expand All @@ -34,22 +36,21 @@
# Stepper update period (:float [s])
STEP_DT = 0.04

MOTOR_SAFETY_KP = 50.0
MOTOR_SAFETY_KD = 4.0
MOTOR_VELOCITY_MAX = 3.0

# PID proportional gains (one per actuated joint)
PD_REDUCED_KP = np.array([
PD_REDUCED_KP = (
# Left leg: [HpX, HpZ, HpY, KnY, AkY, AkX]
5000.0, 5000.0, 8000.0, 4000.0, 8000.0, 5000.0,
# Right leg: [HpX, HpZ, HpY, KnY, AkY, AkX]
5000.0, 5000.0, 8000.0, 4000.0, 8000.0, 5000.0])
PD_REDUCED_KD = np.array([
5000.0, 5000.0, 8000.0, 4000.0, 8000.0, 5000.0)
PD_REDUCED_KD = (
# Left leg: [HpX, HpZ, HpY, KnY, AkY, AkX]
0.02, 0.01, 0.015, 0.01, 0.015, 0.01,
# Right leg: [HpX, HpZ, HpY, KnY, AkY, AkX]
0.02, 0.01, 0.015, 0.01, 0.015, 0.01])
0.02, 0.01, 0.015, 0.01, 0.015, 0.01)

PD_FULL_KP = np.array([
PD_FULL_KP = (
# Neck: [Y]
100.0,
# Back: [Z, Y, X]
Expand All @@ -59,8 +60,8 @@
# Right arm: [ShZ, ShX, ElY, ElX, WrY, WrX, WrY2]
500.0, 100.0, 200.0, 500.0, 10.0, 100.0, 10.0,
# Lower body motors
*PD_REDUCED_KP])
PD_FULL_KD = np.array([
*PD_REDUCED_KP)
PD_FULL_KD = (
# Neck: [Y]
0.01,
# Back: [Z, Y, X]
Expand All @@ -70,7 +71,7 @@
# Right arm: [ShZ, ShX, ElY, ElX, WrY, WrX, WrY2]
0.01, 0.01, 0.01, 0.02, 0.01, 0.02, 0.02,
# Lower body motors
*PD_REDUCED_KD])
*PD_REDUCED_KD)

# Mahony filter proportional and derivative gains
# See: https://cas.mines-paristech.fr/~petit/papers/ral22/main.pdf
Expand All @@ -81,7 +82,7 @@
REWARD_MIXTURE = {
'direction': 0.0,
'energy': 0.0,
'done': 1.0
'survival': 1.0
}
# Standard deviation ratio of each individual origin of randomness
STD_RATIO = {
Expand Down Expand Up @@ -239,16 +240,6 @@ def joint_position_idx(joint_name: str) -> int:
cls=AtlasJiminyEnv
),
layers_config=[
dict(
block=dict(
cls=MotorSafetyLimit,
kwargs=dict(
kp=MOTOR_SAFETY_KP,
kd=MOTOR_SAFETY_KD,
soft_position_margin=0.0
)
),
),
dict(
block=dict(
cls=PDController,
Expand All @@ -258,7 +249,7 @@ def joint_position_idx(joint_name: str) -> int:
kp=PD_FULL_KP,
kd=PD_FULL_KD,
target_position_margin=0.0,
target_velocity_limit=float("inf")
target_velocity_limit=MOTOR_VELOCITY_MAX,
)
),
wrapper=dict(
Expand All @@ -284,16 +275,6 @@ def joint_position_idx(joint_name: str) -> int:
cls=AtlasReducedJiminyEnv
),
layers_config=[
dict(
block=dict(
cls=MotorSafetyLimit,
kwargs=dict(
kp=MOTOR_SAFETY_KP,
kd=MOTOR_SAFETY_KD,
soft_position_margin=0.0
)
),
),
dict(
block=dict(
cls=PDController,
Expand All @@ -303,7 +284,7 @@ def joint_position_idx(joint_name: str) -> int:
kp=PD_REDUCED_KP,
kd=PD_REDUCED_KD,
target_position_margin=0.0,
target_velocity_limit=float("inf")
target_velocity_limit=MOTOR_VELOCITY_MAX,
)
),
wrapper=dict(
Expand Down
10 changes: 5 additions & 5 deletions python/gym_jiminy/envs/gym_jiminy/envs/cassie.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@
STEP_DT = 0.04

# PID proportional gains (one per actuated joint)
PD_KP = np.array([50.0, 50.0, 50.0, 80.0, 8.0,
50.0, 50.0, 50.0, 80.0, 8.0])
PD_KP = (50.0, 50.0, 50.0, 80.0, 8.0,
50.0, 50.0, 50.0, 80.0, 8.0)
# PID derivative gains (one per actuated joint)
PD_KD = np.array([0.01, 0.02, 0.02, 0.03, 0.02,
0.01, 0.02, 0.02, 0.03, 0.02])
PD_KD = (0.01, 0.02, 0.02, 0.03, 0.02,
0.01, 0.02, 0.02, 0.03, 0.02)

# Mahony filter proportional and derivative gains
MAHONY_KP = 1.0
Expand All @@ -55,7 +55,7 @@
REWARD_MIXTURE = {
'direction': 0.0,
'energy': 0.0,
'done': 1.0
'survival': 1.0
}
# Standard deviation ratio of each individual origin of randomness
STD_RATIO = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@


if __name__ == '__main__':
# Fix weird issue with multiprocessing
__spec__ = None

# Enforce seed for most common libraries
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
random.seed(SEED)
Expand Down
13 changes: 7 additions & 6 deletions python/gym_jiminy/unit_py/test_pipeline_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,19 +162,20 @@ def test_pid_controller(self):
assert isinstance(controller, PDController) and controller.order == 1
ctrl_name = controller.name
n_motors = len(controller.fieldnames)
p = env.log_data["variables"][".".join((
pos = env.log_data["variables"][".".join((
"HighLevelController", ctrl_name, "state", str(n_motors - 1)))]
v = env.log_data["variables"][".".join((
vel = env.log_data["variables"][".".join((
"HighLevelController", ctrl_name, controller.fieldnames[-1]))]

# Make sure that the position and velocity targets are consistent
self.assertTrue(np.allclose(
np.diff(p) / controller.control_dt, v[1:], atol=TOLERANCE))
np.diff(pos) / controller.control_dt, vel[1:], atol=TOLERANCE))

# Make sure that the position and velocity targets are within bounds
# Make sure that the position targets are within bounds.
# No such guarantee can be provided for higher-order derivatives.
robot = env.robot
p_min = robot.position_limit_lower[robot.motors_position_idx[-1]]
p_max = robot.position_limit_upper[robot.motors_position_idx[-1]]
self.assertTrue(np.all(np.logical_and(p_min < p, p < p_max)))
v_max = robot.velocity_limit[robot.motors_velocity_idx[-1]]
self.assertTrue(np.all(np.logical_and(-v_max < v, v < v_max)))
# v_max = robot.velocity_limit[robot.motors_velocity_idx[-1]]
# self.assertTrue(np.all(np.logical_and(-v_max < v, v < v_max)))
10 changes: 7 additions & 3 deletions python/gym_jiminy/unit_py/test_pipeline_design.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from gym_jiminy.common.bases import JiminyEnvInterface


TOLERANCE = 1.0e-6

class PipelineDesign(unittest.TestCase):
""" TODO: Write documentation
"""
Expand Down Expand Up @@ -155,8 +157,10 @@ def test_step_state(self):

# Observation stacking is skipping the required number of frames
stack_dt = (self.skip_frames_ratio + 1) * env.observe_dt
for i in range(3):
self.assertEqual(obs['t'][i], i * stack_dt)
t_obs_last = env.step_dt - env.step_dt % stack_dt
for i in range(self.num_stack):
self.assertTrue(np.isclose(
obs['t'][::-1][i], t_obs_last - i * stack_dt, TOLERANCE))

# Initial observation is consistent with internal simulator state
controller_target_obs = obs['actions']['controller_0']
Expand All @@ -175,7 +179,7 @@ def test_step_state(self):
obs, *_ = env.step(action)
for i, t in enumerate(np.flip(obs['t'])):
self.assertTrue(np.isclose(
t, n_steps_breakpoint * env.step_dt - i * stack_dt, 1.0e-6))
t, n_steps_breakpoint * env.step_dt - i * stack_dt, TOLERANCE))
imu_data_ref = env.simulator.robot.sensors_data['ImuSensor']
imu_data_obs = obs['measurements']['ImuSensor'][-1]
self.assertTrue(np.all(imu_data_ref == imu_data_obs))
Expand Down

0 comments on commit 504c6dd

Please sign in to comment.