In [1]:
!pip install ninja "packaging>=24.2,<26.0"
!pip install peft
!pip install dm-tree==0.1.9
!pip install -U transformers
!pip install flash-attn==2.7.3 --no-build-isolation

Collecting transformers
  Using cached transformers-4.57.5-py3-none-any.whl.metadata (43 kB)
Collecting tokenizers<=0.23.0,>=0.22.0 (from transformers)
  Using cached tokenizers-0.22.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.3 kB)
Using cached transformers-4.57.5-py3-none-any.whl (12.0 MB)
Using cached tokenizers-0.22.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
Installing collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers
    Found existing installation: tokenizers 0.21.4
    Uninstalling tokenizers-0.21.4:
      Successfully uninstalled tokenizers-0.21.4
  Attempting uninstall: transformers
    Found existing installation: transformers 4.53.3
    Uninstalling transformers-4.53.3:
      Successfully uninstalled transformers-4.53.3
Successfully installed tokenizers-0.22.2 transformers-4.57.5


In [1]:
import random
import numpy as np
import os
import torch
import json
from PIL import Image
from src.env.env import RILAB_OMY_ENV
from torchvision import transforms

from lerobot.policies.groot.modeling_groot import GrootPolicy
from lerobot.policies.groot.processor_groot import make_groot_pre_post_processors
from lerobot.processor import PolicyAction, PolicyProcessorPipeline
from lerobot.processor.converters import (
    batch_to_transition,
    policy_action_to_transition,
    transition_to_batch,
    transition_to_policy_action,
)
from lerobot.utils.constants import POLICY_POSTPROCESSOR_DEFAULT_NAME, POLICY_PREPROCESSOR_DEFAULT_NAME


import glfw

  from .autonotebook import tqdm as notebook_tqdm


## Load Model

In [3]:
'''
Meta data is for loading dataset statistics and feature information
'''
repo_id_or_path ='ckpt/tutorial_v2_grootn15/checkpoints/last/pretrained_model' #'Jeongeun/tutorial_v2_grootn15'
device = 'cuda'

# 
policy = GrootPolicy.from_pretrained(repo_id_or_path)
policy.to(device)

[GROOT] Flash Attention version: 2.7.3
Loading pretrained dual brain from nvidia/GR00T-N1.5-3B
Tune backbone vision tower: False
Tune backbone LLM: False
Tune action head projector: True
Tune action head DiT: True


Fetching 13 files: 100%|██████████| 13/13 [00:00<00:00, 217234.87it/s]
`torch_dtype` is deprecated! Use `dtype` instead!


[GROOT] Copying vendor Eagle files to cache: /home/rilab/ros2_ws/src/Lerobot-MujoCo-Custom-Policy-Tutorial/.venv/lib/python3.12/site-packages/lerobot/policies/groot/eagle2_hg_model -> /home/rilab/.cache/huggingface/lerobot/lerobot/eagle2hg-processor-groot-n1p5
[GROOT] Assets repo: lerobot/eagle2hg-processor-groot-n1p5 
 Cache dir: /home/rilab/.cache/huggingface/lerobot/lerobot/eagle2hg-processor-groot-n1p5
Tune backbone llm: False
Tune backbone visual: True
Total number of DiT parameters:  550386688
Total number of SelfAttentionTransformer parameters:  201433088
Tune action head projector: True
Tune action head diffusion model: True


Loading checkpoint shards: 100%|██████████| 3/3 [00:00<00:00,  8.98it/s]


Tune backbone llm: False
Tune backbone visual: False
Tune action head projector: True
Tune action head diffusion model: True
Loading weights from local directory


GrootPolicy(
  (_groot_model): GR00TN15(
    (backbone): EagleBackbone(
      (eagle_model): Eagle25VLForConditionalGeneration(
        (vision_model): SiglipVisionModel(
          (vision_model): SiglipVisionTransformer(
            (embeddings): SiglipVisionEmbeddings(
              (patch_embedding): Conv2d(3, 1152, kernel_size=(14, 14), stride=(14, 14), padding=valid)
              (position_embedding): Embedding(256, 1152)
            )
            (encoder): SiglipEncoder(
              (layers): ModuleList(
                (0-26): 27 x SiglipEncoderLayer(
                  (layer_norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
                  (self_attn): SiglipAttention(
                    (k_proj): Linear(in_features=1152, out_features=1152, bias=True)
                    (v_proj): Linear(in_features=1152, out_features=1152, bias=True)
                    (q_proj): Linear(in_features=1152, out_features=1152, bias=True)
                    (out_proj): Linear(i

In [5]:

kwargs = {}
preprocessor_overrides = {}
postprocessor_overrides = {}
preprocessor_overrides["groot_pack_inputs_v3"] = {
    "stats": kwargs.get("dataset_stats"),
    "normalize_min_max": True,
}

# Also ensure postprocessing slices to env action dim and unnormalizes with dataset stats
env_action_dim = policy.config.output_features["action"].shape[0]
postprocessor_overrides["groot_action_unpack_unnormalize_v1"] = {
    "stats": kwargs.get("dataset_stats"),
    "normalize_min_max": True,
    "env_action_dim": env_action_dim,
}
kwargs["preprocessor_overrides"] = preprocessor_overrides
kwargs["postprocessor_overrides"] = postprocessor_overrides


preprocessor = PolicyProcessorPipeline.from_pretrained(
    pretrained_model_name_or_path=repo_id_or_path,
    config_filename=f"{POLICY_PREPROCESSOR_DEFAULT_NAME}.json",
    overrides=kwargs.get("preprocessor_overrides", {}),
    to_transition=batch_to_transition,
    to_output=transition_to_batch,
)

postprocessor =  PolicyProcessorPipeline.from_pretrained(
    pretrained_model_name_or_path=repo_id_or_path,
    config_filename=f"{POLICY_POSTPROCESSOR_DEFAULT_NAME}.json",
    overrides=kwargs.get("postprocessor_overrides", {}),
    to_transition=policy_action_to_transition,
    to_output=transition_to_policy_action,
)

## Load Environment

In [6]:
'''
Load environment configuration and initialize environments
'''
# Evaluation Configuration
TEST_EPISODES = 20
MAX_EPISODE_STEPS = 10_000

In [7]:
config_file_path = './configs/train.json'
with open(config_file_path) as f:
    env_conf = json.load(f)
omy_env = RILAB_OMY_ENV(cfg=env_conf, seed=0, 
                        action_type='joint', 
                        obs_type='joint_pos',
                        vis_mode = 'teleop')


-----------------------------------------------------------------------------
name:[tabletop_env] dt:[0.002] HZ:[500]
 n_qpos:[34] n_qvel:[31] n_qacc:[31] n_ctrl:[8]
 integrator:[IMPLICITFAST]

n_body:[25]
 [0/25] [world] mass:[0.00]kg
 [1/25] [front_object_table] mass:[1.00]kg
 [2/25] [camera] mass:[0.00]kg
 [3/25] [camera2] mass:[0.00]kg
 [4/25] [camera3] mass:[0.00]kg
 [5/25] [base] mass:[3.59]kg
 [6/25] [link1] mass:[2.06]kg
 [7/25] [link2] mass:[3.68]kg
 [8/25] [link3] mass:[2.39]kg
 [9/25] [link4] mass:[1.40]kg
 [10/25] [link5] mass:[1.40]kg
 [11/25] [link6] mass:[0.65]kg
 [12/25] [camera_center] mass:[0.00]kg
 [13/25] [tcp_link] mass:[0.32]kg
 [14/25] [rh_p12_rn_r1] mass:[0.07]kg
 [15/25] [rh_p12_rn_r2] mass:[0.02]kg
 [16/25] [rh_p12_rn_l1] mass:[0.07]kg
 [17/25] [rh_p12_rn_l2] mass:[0.02]kg
 [18/25] [body_obj_wooden_cabinet] mass:[2.24]kg
 [19/25] [body_obj_wooden_cabinet_top] mass:[3.00]kg
 [20/25] [body_obj_wooden_cabinet_middle] mass:[3.00]kg
 [21/25] [body_obj_wooden_cabin

In [8]:
def get_default_transform():
    """
    Returns a torchvision transform that:
     Converts to a FloatTensor and scales pixel values [0,255] -> [0.0,1.0]
    """
    return transforms.Compose([
        transforms.ToTensor(),  # PIL [0–255] -> FloatTensor [0.0–1.0], shape C×H×W
    ])
IMG_TRANSFORM = get_default_transform()

## Evaluation

In [9]:
'''
Run one evaluation episode
'''
def run_one_episode():
    omy_env.reset()
    policy.reset()
    observation = omy_env.get_observation()
    omy_env.env.tick = 0
    while omy_env.env.is_viewer_alive() and omy_env.env.tick < MAX_EPISODE_STEPS:
        omy_env.step_env()
        if omy_env.env.loop_every(HZ = 20):
            success = omy_env.check_success()
            if success: break
            if omy_env.env.is_key_pressed_once(glfw.KEY_Z):
                break  # for debugging: press 'z' to end the episode
            agent_image, wrist_image = omy_env.grab_image(return_side=False)
            # # resize to 256x256
            frame = {
                "observation.state": torch.tensor(observation, device=device).unsqueeze(0),
                'task': [env_conf['language_instruction']]
            }
            agent_image = Image.fromarray(agent_image)
            wrist_image = Image.fromarray(wrist_image)
            agent_image = agent_image.resize((256, 256))
            wrist_image = wrist_image.resize((256, 256))
            agent_image = IMG_TRANSFORM(agent_image)
            wrist_image = IMG_TRANSFORM(wrist_image)
            frame["observation.image"] = agent_image
            frame["observation.wrist_image"] = wrist_image
            # pre-process the frame
            frame = preprocessor(frame)
            # select action
            with torch.inference_mode():
                action = policy.select_action(frame)
            # post-process the action
            action = postprocessor(action)
            action = action.squeeze(0).cpu().numpy()
            observation = omy_env.step(action, gripper_mode='continuous')
            omy_env.render()
    return success

In [10]:
'''
Run evaluation over multiple episodes
'''
results = []
for episode in range(TEST_EPISODES):
    success = run_one_episode()
    results.append(success)
    print(f"Episode {episode+1}/{TEST_EPISODES} - Success: {success}")
omy_env.env.close_viewer()
# log average success rate
avg_success = sum(results) / len(results)
print(f"Average Success Rate over {TEST_EPISODES} episodes: {avg_success*100:.2f}%")


`use_fast` is set to `True` but the image processor class does not have a fast version.  Falling back to the slow version.


['top', 'close']
DONE INITIALIZATION


The tokenizer you are loading from '/home/rilab/.cache/huggingface/lerobot/lerobot/eagle2hg-processor-groot-n1p5' with an incorrect regex pattern: https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503/discussions/84#69121093e8b480e709447d5e. This will lead to incorrect tokenization. You should set the `fix_mistral_regex=True` flag when loading this tokenizer to fix this issue.
The tokenizer you are loading from '/home/rilab/.cache/huggingface/lerobot/lerobot/eagle2hg-processor-groot-n1p5' with an incorrect regex pattern: https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503/discussions/84#69121093e8b480e709447d5e. This will lead to incorrect tokenization. You should set the `fix_mistral_regex=True` flag when loading this tokenizer to fix this issue.


Episode 1/20 - Success: True
['top', 'close']
DONE INITIALIZATION
Episode 2/20 - Success: True
['top', 'close']
DONE INITIALIZATION
Episode 3/20 - Success: False
['top', 'close']
DONE INITIALIZATION
Episode 4/20 - Success: False
['top', 'close']
DONE INITIALIZATION
Episode 5/20 - Success: True
['top', 'close']
DONE INITIALIZATION
Episode 6/20 - Success: False
['top', 'close']
DONE INITIALIZATION
Episode 7/20 - Success: True
['top', 'close']
DONE INITIALIZATION
Episode 8/20 - Success: False
['top', 'close']
DONE INITIALIZATION
Episode 9/20 - Success: True
['top', 'close']
DONE INITIALIZATION
Episode 10/20 - Success: True
['top', 'close']
DONE INITIALIZATION
Episode 11/20 - Success: True
['top', 'close']
DONE INITIALIZATION
Episode 12/20 - Success: True
['top', 'close']
DONE INITIALIZATION
Episode 13/20 - Success: True
['top', 'close']
DONE INITIALIZATION
Episode 14/20 - Success: True
['top', 'close']
DONE INITIALIZATION
Episode 15/20 - Success: True
['top', 'close']
DONE INITIALIZATION
