# Pi0 Trial

## Introduction

We use the ALOHA simulation environment to try out the following approaches:

1. Use the Hugging Face version (i.e., PyTorch version) of `pi0_base` (general-purpose model)
2. Fine-tune the Hugging Face version of `pi0_base`, then use it

### Useful Links

- Blog post on porting the model to LeRobot:
  - https://huggingface.co/blog/pi0
- Visualization of the transfer cube task:
  - https://lerobot-visualize-dataset.hf.space/lerobot/aloha_sim_transfer_cube_human
- Scripts to convert the model from Jax to PyTorch:
  - https://github.com/huggingface/lerobot/blob/main/lerobot/common/policies/pi0/conversion_scripts
- Training configuration of `pi0_aloha_sim`:
  - [https://github.com/Physical-Intelligence/openpi/blob/main/src/openpi/training/config.py#L616-L627](https://github.com/Physical-Intelligence/openpi/blob/581e07d73af36d336cef1ec9d7172553b2332193/src/openpi/training/config.py#L616-L627)

## Preparation

In [1]:
import itertools
import os
import pprint  # noqa: F401
import sys

from pathlib import Path

import torch

from IPython.display import Video
from huggingface_hub import whoami, interpreter_login
from lerobot.common.datasets.lerobot_dataset import LeRobotDatasetMetadata
from lerobot.common.envs.factory import make_env_config, make_env
from lerobot.common.envs.utils import preprocess_observation
from lerobot.common.policies.factory import make_policy_config, make_policy
from lerobot.common.utils.io_utils import write_video
from lerobot.common.utils.utils import init_logging
from lerobot.configs import parser
from lerobot.configs.train import TrainPipelineConfig
from lerobot.scripts.train import train  # noqa: F401
from torch import Tensor

In [2]:
# Refer to https://github.com/huggingface/transformers/issues/5486
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [3]:
# We need to acknowledge PaliGemma's license and login to Hugging Face.
# To acknowledge, visit https://huggingface.co/google/paligemma-3b-pt-224

try:
    assert whoami()
except Exception:
    interpreter_login()

In [4]:
init_logging()

## Use the Hugging Face Version of `pi0_base`

In [5]:
# Create a directory to store outputs.
output_dir = Path("outputs/eval/example_aloha_pi0")
output_dir.mkdir(parents=True, exist_ok=True)

In [6]:
policy_cfg = make_policy_config(
    "pi0",
    device="mps",
    # From "pi0_aloha_sim" setting in convert_pi0_to_hf_lerobot.py
    empty_cameras=2,
    adapt_to_pi_aloha=True,
    use_delta_joint_actions_aloha=False,
)

# Load the pretrained model from Hugging Face.
policy_cfg.pretrained_path = "lerobot/pi0"

ds_meta = LeRobotDatasetMetadata("lerobot/aloha_sim_transfer_cube_human")

policy = make_policy(policy_cfg, ds_meta=ds_meta)

In [7]:
env_cfg = make_env_config(
    "aloha",
    task="AlohaTransferCube-v0",
    # episode_length=100,
)

env = make_env(env_cfg)  # batch size 1

INFO 2025-03-18 18:03:27 /__init__.py:88 MUJOCO_GL is not set, so an OpenGL backend will be chosen automatically.
INFO 2025-03-18 18:03:28 /__init__.py:96 Successfully imported OpenGL backend: %s
INFO 2025-03-18 18:03:28 /__init__.py:31 MuJoCo library version is: %s


In [8]:
# Verify compatibility of the input shape between the policy and the environment.
input_features_without_empty_camera = {
    k: v for k, v in policy.config.input_features.items() if "empty_camera" not in k
}
print(input_features_without_empty_camera)  # channel first
print(env.observation_space)  # channel last

{'observation.images.top': PolicyFeature(type=<FeatureType.VISUAL: 'VISUAL'>, shape=(3, 480, 640)), 'observation.state': PolicyFeature(type=<FeatureType.STATE: 'STATE'>, shape=(14,))}
Dict('agent_pos': Box(-1000.0, 1000.0, (1, 14), float64), 'pixels': Dict('top': Box(0, 255, (1, 480, 640, 3), uint8)))


In [9]:
# Verify compatibility of the output shape between the policy and the environment.
print(policy.config.output_features)
print(env.action_space)

{'action': PolicyFeature(type=<FeatureType.ACTION: 'ACTION'>, shape=(14,))}
Box(-1.0, 1.0, (1, 14), float32)


In [10]:
ds_meta.tasks[0]  # language instruction

'Pick up the cube with the right arm and transfer it to the left arm.'

In [11]:
def convert_env_observation_to_policy_observation(
    observation: dict[str, Tensor],
) -> dict[str, Tensor | list[str]]:
    preprocessed = preprocess_observation(observation)
    converted = {k: v.to("mps") for k, v in preprocessed.items()}
    converted["task"] = [ds_meta.tasks[0]]  # batch size 1
    return converted

In [12]:
# Reset the policy and environments to prepare for rollout.
policy.reset()
observation, _info = env.reset(seed=42)

rewards = []
frames = [env.envs[0].render()]  # with the initial frame

for step in itertools.count(1):
    observation = convert_env_observation_to_policy_observation(observation)

    with torch.inference_mode():
        action = policy.select_action(observation)

    # Prepare the action for the environment.
    action = action.to("cpu").numpy()

    # Step through the environment and receive a new observation.
    observation, [reward], _terminated, [truncated], _info = env.step(action)
    print(f"{step=} {reward=:f}")

    rewards.append(reward)
    frames.append(env.envs[0].render())

    if truncated:
        break

step=1 reward=0.000000
step=2 reward=0.000000
step=3 reward=0.000000
step=4 reward=0.000000
step=5 reward=4.000000
step=6 reward=0.000000
step=7 reward=0.000000
step=8 reward=0.000000
step=9 reward=0.000000
step=10 reward=0.000000
step=11 reward=0.000000
step=12 reward=0.000000
step=13 reward=0.000000
step=14 reward=4.000000
step=15 reward=0.000000
step=16 reward=0.000000
step=17 reward=0.000000
step=18 reward=0.000000
step=19 reward=0.000000
step=20 reward=0.000000
step=21 reward=0.000000
step=22 reward=0.000000
step=23 reward=0.000000
step=24 reward=0.000000
step=25 reward=0.000000
step=26 reward=0.000000
step=27 reward=0.000000
step=28 reward=0.000000
step=29 reward=0.000000
step=30 reward=0.000000
step=31 reward=0.000000
step=32 reward=0.000000
step=33 reward=0.000000
step=34 reward=0.000000
step=35 reward=0.000000
step=36 reward=0.000000
step=37 reward=0.000000
step=38 reward=0.000000
step=39 reward=0.000000
step=40 reward=0.000000
step=41 reward=0.000000
step=42 reward=0.000000
s

In [13]:
video_path = output_dir / "rollout.mp4"
write_video(video_path, frames, env.metadata["render_fps"])

In [14]:
Video(video_path)

In [15]:
del policy

## Fine-Tune the Hugging Face Version of `pi0_base`

In [16]:
# HACK: Populate the config object by modifying sys.argv
@parser.wrap()
def load_config(config: TrainPipelineConfig):
    return config


sys.argv = sys.argv[:1] + [
    "--policy.path=lerobot/pi0",
    "--policy.device=mps",
    "--dataset.repo_id=lerobot/aloha_sim_transfer_cube_human",
    "--env.type=aloha",
    "--env.task=AlohaTransferCube-v0",
    "--batch_size=4",  # 8 (default) seems to be too large for 32GB M3
    "--steps=2000",
    "--log_freq=100",
    "--wandb.enable=true",  # `uv run wandb login` is required
]

config = load_config()

In [17]:
pprint.pp(config)
# train(config)

TrainPipelineConfig(dataset=DatasetConfig(repo_id='lerobot/aloha_sim_transfer_cube_human',
                                          root=None,
                                          episodes=None,
                                          image_transforms=ImageTransformsConfig(enable=False,
                                                                                 max_num_transforms=3,
                                                                                 random_order=False,
                                                                                 tfs={'brightness': ImageTransformConfig(weight=1.0,
                                                                                                                         type='ColorJitter',
                                                                                                                         kwargs={'brightness': (0.8,
                                                                             