WATonomous · wilsonchenghy · Mar 15, 2026 · Mar 14, 2026 · Mar 14, 2026
diff --git a/...manoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/cabinet/cabinet_env_cfg.py b/...manoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/cabinet/cabinet_env_cfg.py
@@ -49,8 +49,6 @@ class CabinetSceneCfg(InteractiveSceneCfg):
 
     # robots, Will be populated by agent env cfg
     robot: ArticulationCfg = MISSING
-    # End-effector, Will be populated by agent env cfg
-    ee_frame: FrameTransformerCfg = MISSING
 
     cabinet = ArticulationCfg(
         prim_path="{ENV_REGEX_NS}/Cabinet",
@@ -180,8 +178,8 @@ class EventCfg:
         mode="startup",
         params={
             "asset_cfg": SceneEntityCfg("cabinet", body_names="drawer_handle_top"),
-            "static_friction_range": (1.0, 1.25),
-            "dynamic_friction_range": (1.25, 1.5),
+            "static_friction_range": (2.0, 2.5),
+            "dynamic_friction_range": (2.0, 2.25),
             "restitution_range": (0.0, 0.0),
             "num_buckets": 16,
         },
@@ -223,7 +221,7 @@ class RewardsCfg:
     # 3. Open the drawer
     open_drawer_bonus = RewTerm(
         func=mdp.open_drawer_bonus,
-        weight=7.5,
+        weight=8.0,
         params={"asset_cfg": SceneEntityCfg("cabinet", joint_names=["drawer_top_joint"])},
     )
     multi_stage_open_drawer = RewTerm(
@@ -276,3 +274,8 @@ def __post_init__(self):
         self.sim.physx.bounce_threshold_velocity = 0.2
         self.sim.physx.bounce_threshold_velocity = 0.01
         self.sim.physx.friction_correlation_distance = 0.00625
+
+
+# PYTHONPATH=$(pwd) /home/hy/IsaacLab/isaaclab.sh -p HumanoidRLPackage/rsl_rl_scripts/train.py --task=Isaac-Open-Drawer-Humanoid-Arm-v0 --headless
+
+# PYTHONPATH=$(pwd) /home/hy/IsaacLab/isaaclab.sh -p HumanoidRLPackage/rsl_rl_scripts/play.py --task=Isaac-Open-Drawer-Humanoid-Arm-v0 --num_envs=1
diff --git a/...noidRLPackage/HumanoidRLSetup/tasks/cabinet/config/HumanoidRLEnv/agents/rsl_rl_ppo_cfg.py b/...noidRLPackage/HumanoidRLSetup/tasks/cabinet/config/HumanoidRLEnv/agents/rsl_rl_ppo_cfg.py
@@ -15,8 +15,8 @@ class HumanoidArmCabinetPPORunnerCfg(RslRlOnPolicyRunnerCfg):
     empirical_normalization = False
     policy = RslRlPpoActorCriticCfg(
         init_noise_std=1.0,
-        actor_obs_normalization=False,
-        critic_obs_normalization=False,
+        # actor_obs_normalization=False,
+        # critic_obs_normalization=False,
         actor_hidden_dims=[256, 128, 64],
         critic_hidden_dims=[256, 128, 64],
         activation="elu",

diff --git a/...HumanoidRLPackage/HumanoidRLSetup/tasks/cabinet/config/HumanoidRLEnv/joint_pos_env_cfg.py b/...HumanoidRLPackage/HumanoidRLSetup/tasks/cabinet/config/HumanoidRLEnv/joint_pos_env_cfg.py
@@ -1,12 +1,7 @@
-from isaaclab.sensors import FrameTransformerCfg
-from isaaclab.sensors.frame_transformer.frame_transformer_cfg import OffsetCfg
 from isaaclab.utils import configclass
 
 from HumanoidRLPackage.HumanoidRLSetup.tasks.cabinet import mdp
-from HumanoidRLPackage.HumanoidRLSetup.tasks.cabinet.cabinet_env_cfg import (
-    CabinetEnvCfg,
-    FRAME_MARKER_SMALL_CFG,
-)
+from HumanoidRLPackage.HumanoidRLSetup.tasks.cabinet.cabinet_env_cfg import CabinetEnvCfg
 from HumanoidRLPackage.HumanoidRLSetup.modelCfg.humanoid import ARM_CFG
 
 
@@ -17,8 +12,8 @@ class HumanoidArmCabinetEnvCfg(CabinetEnvCfg):
     def __post_init__(self):
         super().__post_init__()
 
-        # Robot: humanoid arm (same as manipulation/reach)
         self.scene.robot = ARM_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
+        self.scene.robot.init_state.pos = (-0.1, 0.0, 0.4)
 
         # Arm action: all joints (arm + hand), same as manipulation
         self.actions.arm_action = mdp.JointPositionActionCfg(
@@ -35,41 +30,21 @@ def __post_init__(self):
             close_command_expr={"mcp_.*": 0.0},
         )
 
-        # End-effector frame: TCP + two fingertips for approach/grasp rewards
-        # Uses same body naming as manipulation (DIP_INDEX, IP_THUMB)
-        self.scene.ee_frame = FrameTransformerCfg(
-            prim_path="{ENV_REGEX_NS}/Robot",
-            debug_vis=False,
-            visualizer_cfg=FRAME_MARKER_SMALL_CFG.replace(
-                prim_path="/Visuals/EndEffectorFrameTransformer"
-            ),
-            target_frames=[
-                FrameTransformerCfg.FrameCfg(
-                    prim_path="{ENV_REGEX_NS}/Robot/wrist_extension",
-                    name="ee_tcp",
-                    offset=OffsetCfg(pos=(0.0, 0.0, 0.0)),
-                ),
-                FrameTransformerCfg.FrameCfg(
-                    prim_path="{ENV_REGEX_NS}/Robot/DIP_INDEX_v1_0",
-                    name="tool_leftfinger",
-                    offset=OffsetCfg(pos=(0.0, 0.0, 0.0)),
-                ),
-                FrameTransformerCfg.FrameCfg(
-                    prim_path="{ENV_REGEX_NS}/Robot/IP_THUMB_v1_0",
-                    name="tool_rightfinger",
-                    offset=OffsetCfg(pos=(0.0, 0.0, 0.0)),
-                ),
-            ],
-        )
 
-        # Cabinet pose for humanoid arm workspace (similar to openarm)
-        self.scene.cabinet.spawn.scale = (0.75, 0.75, 0.75)
-        self.scene.cabinet.init_state.pos = (0.7, 0.0, 0.3)
+        # Cabinet scale 1.15 so handle gap is large enough for humanoid fingers.
+        self.scene.cabinet.spawn.scale = (1.15, 1.15, 1.15)
+        self.scene.cabinet.init_state.pos = (0.75, 0.0, 0.35)
 
-        # Reward params: gripper offset and grasp open position (MCP open = 0.5)
+        # Reward params: gripper offset and grasp open position (MCP open = 0.5).
+        # Grasp reward uses only the four fingers (index, middle, ring, pinky), not thumb.
         self.rewards.approach_gripper_handle.params["offset"] = 0.04
         self.rewards.grasp_handle.params["open_joint_pos"] = 0.5
-        self.rewards.grasp_handle.params["asset_cfg"].joint_names = ["mcp_.*"]
+        self.rewards.grasp_handle.params["asset_cfg"].joint_names = [
+            "mcp_index",
+            "mcp_middle",
+            "mcp_ring",
+            "mcp_pinky",
+        ]
 
 
 @configclass

diff --git a/...anoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/cabinet/mdp/observations.py b/...anoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/cabinet/mdp/observations.py
@@ -1,8 +1,3 @@
-# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
-# All rights reserved.
-#
-# SPDX-License-Identifier: BSD-3-Clause
-
 from __future__ import annotations
 
 from typing import TYPE_CHECKING
@@ -17,6 +12,15 @@
     from isaaclab.envs import ManagerBasedRLEnv
 
 
+def _robot_ee_pos(env: ManagerBasedRLEnv) -> torch.Tensor | None:
+    """End-effector position from robot articulation (body DIP_INDEX). Returns None if not ready."""
+    robot = env.scene["robot"]
+    ids, _ = robot.find_bodies("DIP_INDEX_v1_.*", preserve_order=True)
+    if not ids:
+        return None
+    return robot.data.body_pos_w[:, ids[0], :]
+
+
 def rel_ee_object_distance(env: ManagerBasedRLEnv) -> torch.Tensor:
     """The distance between the end-effector and the object."""
     ee_tf_data: FrameTransformerData = env.scene["ee_frame"].data
@@ -27,10 +31,12 @@ def rel_ee_object_distance(env: ManagerBasedRLEnv) -> torch.Tensor:
 
 def rel_ee_drawer_distance(env: ManagerBasedRLEnv) -> torch.Tensor:
     """The distance between the end-effector and the object."""
-    ee_tf_data: FrameTransformerData = env.scene["ee_frame"].data
+    ee_pos = _robot_ee_pos(env)
+    if ee_pos is None:
+        return torch.zeros(env.num_envs, 3, device=env.device)
     cabinet_tf_data: FrameTransformerData = env.scene["cabinet_frame"].data
-
-    return cabinet_tf_data.target_pos_w[..., 0, :] - ee_tf_data.target_pos_w[..., 0, :]
+    handle_pos = cabinet_tf_data.target_pos_w[..., 0, :]
+    return handle_pos - ee_pos
 
 
 def fingertips_pos(env: ManagerBasedRLEnv) -> torch.Tensor:

diff --git a/...n/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/cabinet/mdp/rewards.py b/...n/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/cabinet/mdp/rewards.py
@@ -1,8 +1,3 @@
-# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
-# All rights reserved.
-#
-# SPDX-License-Identifier: BSD-3-Clause
-
 from __future__ import annotations
 
 from typing import TYPE_CHECKING
@@ -16,6 +11,20 @@
     from isaaclab.envs import ManagerBasedRLEnv
 
 
+def _robot_ee_pose(env: ManagerBasedRLEnv):
+    """(ee_tcp_pos, ee_tcp_quat, lfinger_pos, rfinger_pos) from robot articulation bodies. Uses body name regex like reach."""
+    robot = env.scene["robot"]
+    ee_ids, _ = robot.find_bodies("DIP_INDEX_v1_.*", preserve_order=True)
+    thumb_ids, _ = robot.find_bodies("IP_THUMB_v1_.*", preserve_order=True)
+    if not ee_ids or not thumb_ids:
+        return None
+    ee_tcp_pos = robot.data.body_pos_w[:, ee_ids[0], :]
+    ee_tcp_quat = robot.data.body_quat_w[:, ee_ids[0], :]
+    lfinger_pos = robot.data.body_pos_w[:, ee_ids[0], :]
+    rfinger_pos = robot.data.body_pos_w[:, thumb_ids[0], :]
+    return (ee_tcp_pos, ee_tcp_quat, lfinger_pos, rfinger_pos)
+
+
 def approach_ee_handle(env: ManagerBasedRLEnv, threshold: float) -> torch.Tensor:
     r"""Reward the robot for reaching the drawer handle using inverse-square law.
 
@@ -29,87 +38,59 @@ def approach_ee_handle(env: ManagerBasedRLEnv, threshold: float) -> torch.Tensor
         \end{cases}
 
     """
-    ee_tcp_pos = env.scene["ee_frame"].data.target_pos_w[..., 0, :]
+    pose = _robot_ee_pose(env)
+    if pose is None:
+        return torch.zeros(env.num_envs, device=env.device)
+    ee_tcp_pos, _, _, _ = pose
     handle_pos = env.scene["cabinet_frame"].data.target_pos_w[..., 0, :]
 
-    # Compute the distance of the end-effector to the handle
     distance = torch.norm(handle_pos - ee_tcp_pos, dim=-1, p=2)
-
-    # Reward the robot for reaching the handle
     reward = 1.0 / (1.0 + distance**2)
     reward = torch.pow(reward, 2)
     return torch.where(distance <= threshold, 2 * reward, reward)
 
 
 def align_ee_handle(env: ManagerBasedRLEnv) -> torch.Tensor:
-    """Reward for aligning the end-effector with the handle.
-
-    The reward is based on the alignment of the gripper with the handle. It is computed as follows:
-
-    .. math::
-
-        reward = 0.5 * (align_z^2 + align_x^2)
-
-    where :math:`align_z` is the dot product of the z direction of the gripper and the -x direction of the handle
-    and :math:`align_x` is the dot product of the x direction of the gripper and the -y direction of the handle.
-    """
-    ee_tcp_quat = env.scene["ee_frame"].data.target_quat_w[..., 0, :]
+    """Reward for aligning the end-effector with the handle."""
+    pose = _robot_ee_pose(env)
+    if pose is None:
+        return torch.zeros(env.num_envs, device=env.device)
+    _, ee_tcp_quat, _, _ = pose
     handle_quat = env.scene["cabinet_frame"].data.target_quat_w[..., 0, :]
 
     ee_tcp_rot_mat = matrix_from_quat(ee_tcp_quat)
     handle_mat = matrix_from_quat(handle_quat)
 
-    # get current x and y direction of the handle
     handle_x, handle_y = handle_mat[..., 0], handle_mat[..., 1]
-    # get current x and z direction of the gripper
     ee_tcp_x, ee_tcp_z = ee_tcp_rot_mat[..., 0], ee_tcp_rot_mat[..., 2]
 
-    # make sure gripper aligns with the handle
-    # in this case, the z direction of the gripper should be close to the -x direction of the handle
-    # and the x direction of the gripper should be close to the -y direction of the handle
-    # dot product of z and x should be large
     align_z = torch.bmm(ee_tcp_z.unsqueeze(1), -handle_x.unsqueeze(-1)).squeeze(-1).squeeze(-1)
     align_x = torch.bmm(ee_tcp_x.unsqueeze(1), -handle_y.unsqueeze(-1)).squeeze(-1).squeeze(-1)
     return 0.5 * (torch.sign(align_z) * align_z**2 + torch.sign(align_x) * align_x**2)
 
 
 def align_grasp_around_handle(env: ManagerBasedRLEnv) -> torch.Tensor:
-    """Bonus for correct hand orientation around the handle.
-
-    The correct hand orientation is when the left finger is above the handle and the right finger is below the handle.
-    """
-    # Target object position: (num_envs, 3)
+    """Bonus for correct hand orientation around the handle."""
     handle_pos = env.scene["cabinet_frame"].data.target_pos_w[..., 0, :]
-    # Fingertips position: (num_envs, n_fingertips, 3)
-    ee_fingertips_w = env.scene["ee_frame"].data.target_pos_w[..., 1:, :]
-    lfinger_pos = ee_fingertips_w[..., 0, :]
-    rfinger_pos = ee_fingertips_w[..., 1, :]
+    pose = _robot_ee_pose(env)
+    if pose is None:
+        return torch.zeros(env.num_envs, device=env.device, dtype=torch.bool)
+    _, _, lfinger_pos, rfinger_pos = pose
 
-    # Check if hand is in a graspable pose
     is_graspable = (rfinger_pos[:, 2] < handle_pos[:, 2]) & (lfinger_pos[:, 2] > handle_pos[:, 2])
-
-    # bonus if left finger is above the drawer handle and right below
     return is_graspable
 
 
 def approach_gripper_handle(env: ManagerBasedRLEnv, offset: float = 0.04) -> torch.Tensor:
-    """Reward the robot's gripper reaching the drawer handle with the right pose.
-
-    This function returns the distance of fingertips to the handle when the fingers are in a grasping orientation
-    (i.e., the left finger is above the handle and the right finger is below the handle). Otherwise, it returns zero.
-    """
-    # Target object position: (num_envs, 3)
+    """Reward the robot's gripper reaching the drawer handle with the right pose."""
     handle_pos = env.scene["cabinet_frame"].data.target_pos_w[..., 0, :]
-    # Fingertips position: (num_envs, n_fingertips, 3)
-    ee_fingertips_w = env.scene["ee_frame"].data.target_pos_w[..., 1:, :]
-    lfinger_pos = ee_fingertips_w[..., 0, :]
-    rfinger_pos = ee_fingertips_w[..., 1, :]
+    pose = _robot_ee_pose(env)
+    if pose is None:
+        return torch.zeros(env.num_envs, device=env.device)
+    _, _, lfinger_pos, rfinger_pos = pose
 
-    # Compute the distance of each finger from the handle
     lfinger_dist = torch.abs(lfinger_pos[:, 2] - handle_pos[:, 2])
     rfinger_dist = torch.abs(rfinger_pos[:, 2] - handle_pos[:, 2])
-
-    # Check if hand is in a graspable pose
     is_graspable = (rfinger_pos[:, 2] < handle_pos[:, 2]) & (lfinger_pos[:, 2] > handle_pos[:, 2])
 
     return is_graspable * ((offset - lfinger_dist) + (offset - rfinger_dist))
@@ -126,7 +107,10 @@ def grasp_handle(
     Note:
         It is assumed that zero joint position corresponds to the fingers being closed.
     """
-    ee_tcp_pos = env.scene["ee_frame"].data.target_pos_w[..., 0, :]
+    pose = _robot_ee_pose(env)
+    if pose is None:
+        return torch.zeros(env.num_envs, device=env.device)
+    ee_tcp_pos, _, _, _ = pose
     handle_pos = env.scene["cabinet_frame"].data.target_pos_w[..., 0, :]
     gripper_joint_pos = env.scene[asset_cfg.name].data.joint_pos[:, asset_cfg.joint_ids]
 

diff --git a/...umanoidRLPackage/HumanoidRLSetup/tasks/lift/config/HumanoidRLEnv/agents/rsl_rl_ppo_cfg.py b/...umanoidRLPackage/HumanoidRLSetup/tasks/lift/config/HumanoidRLEnv/agents/rsl_rl_ppo_cfg.py
@@ -15,8 +15,8 @@ class HumanoidArmLiftPPORunnerCfg(RslRlOnPolicyRunnerCfg):
     empirical_normalization = False
     policy = RslRlPpoActorCriticCfg(
         init_noise_std=1.0,
-        actor_obs_normalization=False,
-        critic_obs_normalization=False,
+        # actor_obs_normalization=False,
+        # critic_obs_normalization=False,
         actor_hidden_dims=[256, 128, 64],
         critic_hidden_dims=[256, 128, 64],
         activation="elu",