Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,6 @@ class CabinetSceneCfg(InteractiveSceneCfg):

# robots, Will be populated by agent env cfg
robot: ArticulationCfg = MISSING
# End-effector, Will be populated by agent env cfg
ee_frame: FrameTransformerCfg = MISSING

cabinet = ArticulationCfg(
prim_path="{ENV_REGEX_NS}/Cabinet",
Expand Down Expand Up @@ -180,8 +178,8 @@ class EventCfg:
mode="startup",
params={
"asset_cfg": SceneEntityCfg("cabinet", body_names="drawer_handle_top"),
"static_friction_range": (1.0, 1.25),
"dynamic_friction_range": (1.25, 1.5),
"static_friction_range": (2.0, 2.5),
"dynamic_friction_range": (2.0, 2.25),
"restitution_range": (0.0, 0.0),
"num_buckets": 16,
},
Expand Down Expand Up @@ -223,7 +221,7 @@ class RewardsCfg:
# 3. Open the drawer
open_drawer_bonus = RewTerm(
func=mdp.open_drawer_bonus,
weight=7.5,
weight=8.0,
params={"asset_cfg": SceneEntityCfg("cabinet", joint_names=["drawer_top_joint"])},
)
multi_stage_open_drawer = RewTerm(
Expand Down Expand Up @@ -276,3 +274,8 @@ def __post_init__(self):
self.sim.physx.bounce_threshold_velocity = 0.2
self.sim.physx.bounce_threshold_velocity = 0.01
self.sim.physx.friction_correlation_distance = 0.00625


# PYTHONPATH=$(pwd) /home/hy/IsaacLab/isaaclab.sh -p HumanoidRLPackage/rsl_rl_scripts/train.py --task=Isaac-Open-Drawer-Humanoid-Arm-v0 --headless

# PYTHONPATH=$(pwd) /home/hy/IsaacLab/isaaclab.sh -p HumanoidRLPackage/rsl_rl_scripts/play.py --task=Isaac-Open-Drawer-Humanoid-Arm-v0 --num_envs=1
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ class HumanoidArmCabinetPPORunnerCfg(RslRlOnPolicyRunnerCfg):
empirical_normalization = False
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_obs_normalization=False,
critic_obs_normalization=False,
# actor_obs_normalization=False,
# critic_obs_normalization=False,
actor_hidden_dims=[256, 128, 64],
critic_hidden_dims=[256, 128, 64],
activation="elu",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
from isaaclab.sensors import FrameTransformerCfg
from isaaclab.sensors.frame_transformer.frame_transformer_cfg import OffsetCfg
from isaaclab.utils import configclass

from HumanoidRLPackage.HumanoidRLSetup.tasks.cabinet import mdp
from HumanoidRLPackage.HumanoidRLSetup.tasks.cabinet.cabinet_env_cfg import (
CabinetEnvCfg,
FRAME_MARKER_SMALL_CFG,
)
from HumanoidRLPackage.HumanoidRLSetup.tasks.cabinet.cabinet_env_cfg import CabinetEnvCfg
from HumanoidRLPackage.HumanoidRLSetup.modelCfg.humanoid import ARM_CFG


Expand All @@ -17,8 +12,8 @@ class HumanoidArmCabinetEnvCfg(CabinetEnvCfg):
def __post_init__(self):
super().__post_init__()

# Robot: humanoid arm (same as manipulation/reach)
self.scene.robot = ARM_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
self.scene.robot.init_state.pos = (-0.1, 0.0, 0.4)

# Arm action: all joints (arm + hand), same as manipulation
self.actions.arm_action = mdp.JointPositionActionCfg(
Expand All @@ -35,41 +30,21 @@ def __post_init__(self):
close_command_expr={"mcp_.*": 0.0},
)

# End-effector frame: TCP + two fingertips for approach/grasp rewards
# Uses same body naming as manipulation (DIP_INDEX, IP_THUMB)
self.scene.ee_frame = FrameTransformerCfg(
prim_path="{ENV_REGEX_NS}/Robot",
debug_vis=False,
visualizer_cfg=FRAME_MARKER_SMALL_CFG.replace(
prim_path="/Visuals/EndEffectorFrameTransformer"
),
target_frames=[
FrameTransformerCfg.FrameCfg(
prim_path="{ENV_REGEX_NS}/Robot/wrist_extension",
name="ee_tcp",
offset=OffsetCfg(pos=(0.0, 0.0, 0.0)),
),
FrameTransformerCfg.FrameCfg(
prim_path="{ENV_REGEX_NS}/Robot/DIP_INDEX_v1_0",
name="tool_leftfinger",
offset=OffsetCfg(pos=(0.0, 0.0, 0.0)),
),
FrameTransformerCfg.FrameCfg(
prim_path="{ENV_REGEX_NS}/Robot/IP_THUMB_v1_0",
name="tool_rightfinger",
offset=OffsetCfg(pos=(0.0, 0.0, 0.0)),
),
],
)

# Cabinet pose for humanoid arm workspace (similar to openarm)
self.scene.cabinet.spawn.scale = (0.75, 0.75, 0.75)
self.scene.cabinet.init_state.pos = (0.7, 0.0, 0.3)
# Cabinet scale 1.15 so handle gap is large enough for humanoid fingers.
self.scene.cabinet.spawn.scale = (1.15, 1.15, 1.15)
self.scene.cabinet.init_state.pos = (0.75, 0.0, 0.35)

# Reward params: gripper offset and grasp open position (MCP open = 0.5)
# Reward params: gripper offset and grasp open position (MCP open = 0.5).
# Grasp reward uses only the four fingers (index, middle, ring, pinky), not thumb.
self.rewards.approach_gripper_handle.params["offset"] = 0.04
self.rewards.grasp_handle.params["open_joint_pos"] = 0.5
self.rewards.grasp_handle.params["asset_cfg"].joint_names = ["mcp_.*"]
self.rewards.grasp_handle.params["asset_cfg"].joint_names = [
"mcp_index",
"mcp_middle",
"mcp_ring",
"mcp_pinky",
]


@configclass
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

from __future__ import annotations

from typing import TYPE_CHECKING
Expand All @@ -17,6 +12,15 @@
from isaaclab.envs import ManagerBasedRLEnv


def _robot_ee_pos(env: ManagerBasedRLEnv) -> torch.Tensor | None:
"""End-effector position from robot articulation (body DIP_INDEX). Returns None if not ready."""
robot = env.scene["robot"]
ids, _ = robot.find_bodies("DIP_INDEX_v1_.*", preserve_order=True)
if not ids:
return None
return robot.data.body_pos_w[:, ids[0], :]


def rel_ee_object_distance(env: ManagerBasedRLEnv) -> torch.Tensor:
"""The distance between the end-effector and the object."""
ee_tf_data: FrameTransformerData = env.scene["ee_frame"].data
Expand All @@ -27,10 +31,12 @@ def rel_ee_object_distance(env: ManagerBasedRLEnv) -> torch.Tensor:

def rel_ee_drawer_distance(env: ManagerBasedRLEnv) -> torch.Tensor:
"""The distance between the end-effector and the object."""
ee_tf_data: FrameTransformerData = env.scene["ee_frame"].data
ee_pos = _robot_ee_pos(env)
if ee_pos is None:
return torch.zeros(env.num_envs, 3, device=env.device)
cabinet_tf_data: FrameTransformerData = env.scene["cabinet_frame"].data

return cabinet_tf_data.target_pos_w[..., 0, :] - ee_tf_data.target_pos_w[..., 0, :]
handle_pos = cabinet_tf_data.target_pos_w[..., 0, :]
return handle_pos - ee_pos


def fingertips_pos(env: ManagerBasedRLEnv) -> torch.Tensor:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

from __future__ import annotations

from typing import TYPE_CHECKING
Expand All @@ -16,6 +11,20 @@
from isaaclab.envs import ManagerBasedRLEnv


def _robot_ee_pose(env: ManagerBasedRLEnv):
"""(ee_tcp_pos, ee_tcp_quat, lfinger_pos, rfinger_pos) from robot articulation bodies. Uses body name regex like reach."""
robot = env.scene["robot"]
ee_ids, _ = robot.find_bodies("DIP_INDEX_v1_.*", preserve_order=True)
thumb_ids, _ = robot.find_bodies("IP_THUMB_v1_.*", preserve_order=True)
if not ee_ids or not thumb_ids:
return None
ee_tcp_pos = robot.data.body_pos_w[:, ee_ids[0], :]
ee_tcp_quat = robot.data.body_quat_w[:, ee_ids[0], :]
lfinger_pos = robot.data.body_pos_w[:, ee_ids[0], :]
rfinger_pos = robot.data.body_pos_w[:, thumb_ids[0], :]
return (ee_tcp_pos, ee_tcp_quat, lfinger_pos, rfinger_pos)


def approach_ee_handle(env: ManagerBasedRLEnv, threshold: float) -> torch.Tensor:
r"""Reward the robot for reaching the drawer handle using inverse-square law.

Expand All @@ -29,87 +38,59 @@ def approach_ee_handle(env: ManagerBasedRLEnv, threshold: float) -> torch.Tensor
\end{cases}

"""
ee_tcp_pos = env.scene["ee_frame"].data.target_pos_w[..., 0, :]
pose = _robot_ee_pose(env)
if pose is None:
return torch.zeros(env.num_envs, device=env.device)
ee_tcp_pos, _, _, _ = pose
handle_pos = env.scene["cabinet_frame"].data.target_pos_w[..., 0, :]

# Compute the distance of the end-effector to the handle
distance = torch.norm(handle_pos - ee_tcp_pos, dim=-1, p=2)

# Reward the robot for reaching the handle
reward = 1.0 / (1.0 + distance**2)
reward = torch.pow(reward, 2)
return torch.where(distance <= threshold, 2 * reward, reward)


def align_ee_handle(env: ManagerBasedRLEnv) -> torch.Tensor:
"""Reward for aligning the end-effector with the handle.

The reward is based on the alignment of the gripper with the handle. It is computed as follows:

.. math::

reward = 0.5 * (align_z^2 + align_x^2)

where :math:`align_z` is the dot product of the z direction of the gripper and the -x direction of the handle
and :math:`align_x` is the dot product of the x direction of the gripper and the -y direction of the handle.
"""
ee_tcp_quat = env.scene["ee_frame"].data.target_quat_w[..., 0, :]
"""Reward for aligning the end-effector with the handle."""
pose = _robot_ee_pose(env)
if pose is None:
return torch.zeros(env.num_envs, device=env.device)
_, ee_tcp_quat, _, _ = pose
handle_quat = env.scene["cabinet_frame"].data.target_quat_w[..., 0, :]

ee_tcp_rot_mat = matrix_from_quat(ee_tcp_quat)
handle_mat = matrix_from_quat(handle_quat)

# get current x and y direction of the handle
handle_x, handle_y = handle_mat[..., 0], handle_mat[..., 1]
# get current x and z direction of the gripper
ee_tcp_x, ee_tcp_z = ee_tcp_rot_mat[..., 0], ee_tcp_rot_mat[..., 2]

# make sure gripper aligns with the handle
# in this case, the z direction of the gripper should be close to the -x direction of the handle
# and the x direction of the gripper should be close to the -y direction of the handle
# dot product of z and x should be large
align_z = torch.bmm(ee_tcp_z.unsqueeze(1), -handle_x.unsqueeze(-1)).squeeze(-1).squeeze(-1)
align_x = torch.bmm(ee_tcp_x.unsqueeze(1), -handle_y.unsqueeze(-1)).squeeze(-1).squeeze(-1)
return 0.5 * (torch.sign(align_z) * align_z**2 + torch.sign(align_x) * align_x**2)


def align_grasp_around_handle(env: ManagerBasedRLEnv) -> torch.Tensor:
"""Bonus for correct hand orientation around the handle.

The correct hand orientation is when the left finger is above the handle and the right finger is below the handle.
"""
# Target object position: (num_envs, 3)
"""Bonus for correct hand orientation around the handle."""
handle_pos = env.scene["cabinet_frame"].data.target_pos_w[..., 0, :]
# Fingertips position: (num_envs, n_fingertips, 3)
ee_fingertips_w = env.scene["ee_frame"].data.target_pos_w[..., 1:, :]
lfinger_pos = ee_fingertips_w[..., 0, :]
rfinger_pos = ee_fingertips_w[..., 1, :]
pose = _robot_ee_pose(env)
if pose is None:
return torch.zeros(env.num_envs, device=env.device, dtype=torch.bool)
_, _, lfinger_pos, rfinger_pos = pose

# Check if hand is in a graspable pose
is_graspable = (rfinger_pos[:, 2] < handle_pos[:, 2]) & (lfinger_pos[:, 2] > handle_pos[:, 2])

# bonus if left finger is above the drawer handle and right below
return is_graspable


def approach_gripper_handle(env: ManagerBasedRLEnv, offset: float = 0.04) -> torch.Tensor:
"""Reward the robot's gripper reaching the drawer handle with the right pose.

This function returns the distance of fingertips to the handle when the fingers are in a grasping orientation
(i.e., the left finger is above the handle and the right finger is below the handle). Otherwise, it returns zero.
"""
# Target object position: (num_envs, 3)
"""Reward the robot's gripper reaching the drawer handle with the right pose."""
handle_pos = env.scene["cabinet_frame"].data.target_pos_w[..., 0, :]
# Fingertips position: (num_envs, n_fingertips, 3)
ee_fingertips_w = env.scene["ee_frame"].data.target_pos_w[..., 1:, :]
lfinger_pos = ee_fingertips_w[..., 0, :]
rfinger_pos = ee_fingertips_w[..., 1, :]
pose = _robot_ee_pose(env)
if pose is None:
return torch.zeros(env.num_envs, device=env.device)
_, _, lfinger_pos, rfinger_pos = pose

# Compute the distance of each finger from the handle
lfinger_dist = torch.abs(lfinger_pos[:, 2] - handle_pos[:, 2])
rfinger_dist = torch.abs(rfinger_pos[:, 2] - handle_pos[:, 2])

# Check if hand is in a graspable pose
is_graspable = (rfinger_pos[:, 2] < handle_pos[:, 2]) & (lfinger_pos[:, 2] > handle_pos[:, 2])

return is_graspable * ((offset - lfinger_dist) + (offset - rfinger_dist))
Expand All @@ -126,7 +107,10 @@ def grasp_handle(
Note:
It is assumed that zero joint position corresponds to the fingers being closed.
"""
ee_tcp_pos = env.scene["ee_frame"].data.target_pos_w[..., 0, :]
pose = _robot_ee_pose(env)
if pose is None:
return torch.zeros(env.num_envs, device=env.device)
ee_tcp_pos, _, _, _ = pose
handle_pos = env.scene["cabinet_frame"].data.target_pos_w[..., 0, :]
gripper_joint_pos = env.scene[asset_cfg.name].data.joint_pos[:, asset_cfg.joint_ids]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ class HumanoidArmLiftPPORunnerCfg(RslRlOnPolicyRunnerCfg):
empirical_normalization = False
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_obs_normalization=False,
critic_obs_normalization=False,
# actor_obs_normalization=False,
# critic_obs_normalization=False,
actor_hidden_dims=[256, 128, 64],
critic_hidden_dims=[256, 128, 64],
activation="elu",
Expand Down
Loading
Loading