## Imports

In [1]:
%load_ext autoreload
%autoreload 2
from pathlib import Path
from pprint import pprint

import imageio
import torch

import lerobot
from IPython.display import Video

## Datasets

In [None]:
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset

# print("List of available datasets:")
# pprint(lerobot.available_datasets)

repo_id = "lerobot/aloha_sim_insertion_human"
# You can easily load a dataset from a Hugging Face repository
dataset = LeRobotDataset(repo_id)

In [17]:
# LeRobotDataset is actually a thin wrapper around an underlying Hugging Face dataset
# (see https://huggingface.co/docs/datasets/index for more information).
print(dataset)
print(dataset.hf_dataset)

# And provides additional utilities for robotics and compatibility with Pytorch
print(f"\naverage number of frames per episode: {dataset.num_samples / dataset.num_episodes:.3f}")
print(f"frames per second used during data collection: {dataset.fps=}")
print(f"keys to access images from cameras: {dataset.camera_keys=}\n")

# Access frame indexes associated to first episode
episode_index = 0
from_idx = dataset.episode_data_index["from"][episode_index].item()
to_idx = dataset.episode_data_index["to"][episode_index].item()

# LeRobot datasets actually subclass PyTorch datasets so you can do everything you know and love from working
# with the latter, like iterating through the dataset. Here we grab all the image frames.
frames = [dataset[idx]["observation.images.top"] for idx in range(from_idx, to_idx)]

# Video frames are now float32 in range [0,1] channel first (c,h,w) to follow pytorch convention. To visualize
# them, we convert to uint8 in range [0,255]
frames = [(frame * 255).type(torch.uint8) for frame in frames]
# and to channel last (h,w,c).
frames = [frame.permute((1, 2, 0)).numpy() for frame in frames]

LeRobotDataset(
  Repository ID: 'lerobot/aloha_sim_insertion_human',
  Split: 'train',
  Number of Samples: 25000,
  Number of Episodes: 50,
  Type: video (.mp4),
  Recorded Frames per Second: 50,
  Camera Keys: ['observation.images.top'],
  Video Frame Keys: ['observation.images.top'],
  Transformations: None,
  Codebase Version: v1.6,
)
Dataset({
    features: ['observation.images.top', 'observation.state', 'action', 'episode_index', 'frame_index', 'timestamp', 'next.done', 'index'],
    num_rows: 25000
})

average number of frames per episode: 500.000
frames per second used during data collection: dataset.fps=50
keys to access images from cameras: dataset.camera_keys=['observation.images.top']



In [21]:
from IPython.display import Video
# Finally, we save the frames to a mp4 video for visualization.
dir_path = "outputs/examples/1_load_lerobot_dataset"
Path(dir_path).mkdir(parents=True, exist_ok=True)
video_path = dir_path + "/episode_0.mp4"
imageio.mimsave(video_path, frames, fps=dataset.fps)
Video(video_path)

In [24]:
# For many machine learning applications we need to load the history of past observations or trajectories of
# future actions. Our datasets can load previous and future frames for each key/modality, using timestamps
# differences with the current loaded frame. For instance:
delta_timestamps = {
    # loads 4 images: 1 second before current frame, 500 ms before, 200 ms before, and current frame
    "observation.images.top": [-1, -0.5, -0.20, 0],
    # loads 8 state vectors: 1.5 seconds before, 1 second before, ... 20 ms, 10 ms, and current frame
    "observation.state": [-1.5, -1, -0.5, -0.20, -0.10, -0.02, -0.01, 0],
    # loads 64 action vectors: current frame, 1 frame in the future, 2 frames, ... 63 frames in the future
    "action": [t / dataset.fps for t in range(64)],
}
dataset = LeRobotDataset(repo_id, delta_timestamps=delta_timestamps)
print(f"\n{dataset[0]['observation.images.top'].shape=}")  # (4,c,h,w)
print(f"{dataset[0]['observation.state'].shape=}")  # (8,c)
print(f"{dataset[0]['action'].shape=}\n")  # (64,c)

# Finally, our datasets are fully compatible with PyTorch dataloaders and samplers because they are just
# PyTorch datasets.
dataloader = torch.utils.data.DataLoader(
    dataset,
    num_workers=0,
    batch_size=32,
    shuffle=True,
)
for batch in dataloader:
    print(f"{batch['observation.images.top'].shape=}")  # (32,4,c,h,w)
    print(f"{batch['observation.state'].shape=}")  # (32,8,c)
    print(f"{batch['action'].shape=}")  # (32,64,c)
    break

Fetching 56 files: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 56/56 [00:00<00:00, 1539.36it/s]



dataset[0]['observation.images.top'].shape=torch.Size([4, 3, 480, 640])
dataset[0]['observation.state'].shape=torch.Size([8, 14])
dataset[0]['action'].shape=torch.Size([64, 14])

batch['observation.images.top'].shape=torch.Size([32, 4, 3, 480, 640])
batch['observation.state'].shape=torch.Size([32, 8, 14])
batch['action'].shape=torch.Size([32, 64, 14])


In [29]:
batch['observation.state'][0][0]

tensor([ 0.0281, -0.5264,  0.8721, -0.0157,  0.4200, -0.0215,  0.1536, -0.0547,
        -0.8091,  0.9081,  0.0494,  0.3163,  0.1224, -0.0032])

## Training

In [10]:
from pathlib import Path

import torch

from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
from lerobot.common.policies.diffusion.configuration_diffusion import DiffusionConfig
from lerobot.common.policies.diffusion.modeling_diffusion import DiffusionPolicy

# Create a directory to store the training checkpoint.
output_directory = Path("outputs/train/example_pusht_diffusion")
output_directory.mkdir(parents=True, exist_ok=True)

# Number of offline training steps (we'll only do offline training for this example.)
# Adjust as you prefer. 5000 steps are needed to get something worth evaluating.
training_steps = 5000
device = torch.device("cuda")
log_freq = 250

# Set up the dataset.
delta_timestamps = {
    # Load the previous image and state at -0.1 seconds before current frame,
    # then load current image and state corresponding to 0.0 second.
    "observation.image": [-0.1, 0.0],
    "observation.state": [-0.1, 0.0],
    # Load the previous action (-0.1), the next action to be executed (0.0),
    # and 14 future actions with a 0.1 seconds spacing. All these actions will be
    # used to supervise the policy.
    "action": [-0.1, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4],
}
dataset = LeRobotDataset("lerobot/pusht", delta_timestamps=delta_timestamps)

# Set up the the policy.
# Policies are initialized with a configuration class, in this case `DiffusionConfig`.
# For this example, no arguments need to be passed because the defaults are set up for PushT.
# If you're doing something different, you will likely need to change at least some of the defaults.
cfg = DiffusionConfig()
policy = DiffusionPolicy(cfg, dataset_stats=dataset.stats)
policy.train()
policy.to(device)

optimizer = torch.optim.Adam(policy.parameters(), lr=1e-4)

# Create dataloader for offline training.
dataloader = torch.utils.data.DataLoader(
    dataset,
    num_workers=4,
    batch_size=64,
    shuffle=True,
    pin_memory=device != torch.device("cpu"),
    drop_last=True,
)

# Run training loop.
step = 0
done = False
while not done:
    for batch in dataloader:
        batch = {k: v.to(device, non_blocking=True) for k, v in batch.items()}
        output_dict = policy.forward(batch)
        loss = output_dict["loss"]
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if step % log_freq == 0:
            print(f"step: {step} loss: {loss.item():.3f}")
        step += 1
        if step >= training_steps:
            done = True
            break

# Save a policy checkpoint.
policy.save_pretrained(output_directory)

Generating train split: 25650 examples [00:00, 381978.62 examples/s]
Fetching 212 files: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 212/212 [00:07<00:00, 28.90it/s]


step: 0 loss: 1.193
step: 250 loss: 0.067
step: 500 loss: 0.054
step: 750 loss: 0.056
step: 1000 loss: 0.049
step: 1250 loss: 0.057
step: 1500 loss: 0.044
step: 1750 loss: 0.048
step: 2000 loss: 0.050
step: 2250 loss: 0.068
step: 2500 loss: 0.045
step: 2750 loss: 0.025
step: 3000 loss: 0.045
step: 3250 loss: 0.025
step: 3500 loss: 0.040
step: 3750 loss: 0.041
step: 4000 loss: 0.052
step: 4250 loss: 0.034
step: 4500 loss: 0.028
step: 4750 loss: 0.040


## Evaluation

In [3]:
from pathlib import Path

import gym_pusht  # noqa: F401
import gymnasium as gym
import imageio
import numpy
import torch
from huggingface_hub import snapshot_download

from lerobot.common.policies.diffusion.modeling_diffusion import DiffusionPolicy
from lerobot.common.policies.act.modeling_act import ACTPolicy



# Download the diffusion policy for pusht environment
# pretrained_policy_path = Path(snapshot_download("lerobot/diffusion_pusht"))
# OR uncomment the following to evaluate a policy from the local outputs/train folder.

# Create a directory to store the video of the evaluation
# output_directory = Path("outputs/eval/example_pusht_diffusion")
# output_directory.mkdir(parents=True, exist_ok=True)
# pretrained_policy_path = Path("outputs/train/example_pusht_diffusion")
# policy = DiffusionPolicy.from_pretrained(pretrained_policy_path)

TASK_NAME = "lerobot_base_distributed_aloha_transfer_cube_1gpus"
step = 50000
# Create a directory to store the video of the evaluation
output_directory = Path(f"outputs/eval/{TASK_NAME}")
output_directory.mkdir(parents=True, exist_ok=True)
policy_path = f"/data/mshukor/logs/lerobot/{TASK_NAME}/checkpoints/{step:06d}/pretrained_model"
pretrained_policy_path = Path(policy_path)
policy = ACTPolicy.from_pretrained(pretrained_policy_path)


policy.eval()

# Check if GPU is available
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available. Device set to:", device)
else:
    device = torch.device("cpu")
    print(f"GPU is not available. Device set to: {device}. Inference will be slower than on GPU.")
    # Decrease the number of reverse-diffusion steps (trades off a bit of quality for 10x speed)
    policy.diffusion.num_inference_steps = 10

policy.to(device)
print(device)

  from .autonotebook import tqdm as notebook_tqdm


Loading weights from local directory
GPU is available. Device set to: cuda
cuda


In [5]:
import gym_aloha 
import gym_xarm 
# Initialize evaluation environment to render two observation types:
# an image of the scene and state/position of the agent. The environment
# also automatically stops running after 300 interactions/steps.
# env = gym.make(
#     "gym_pusht/PushT-v0",
#     obs_type="pixels_agent_pos",
#     max_episode_steps=300,
# )

# env = gym.make(
#     "gym_aloha/AlohaTransferCube-v0",
#     obs_type="pixels_agent_pos",
#     max_episode_steps=300,
# )

env = gym.make(
    "gym_xarm/XarmLift-v0",
    obs_type="pixels_agent_pos",
    max_episode_steps=300,
)

# Reset the policy and environmens to prepare for rollout
policy.reset()
numpy_observation, info = env.reset(seed=42)

In [4]:
# Prepare to collect every rewards and all the frames of the episode,
# from initial state to final state.
rewards = []
frames = []

# Render frame of the initial state
frames.append(env.render())

step = 0
done = False
while not done:
    # Prepare observation for the policy running in Pytorch
    state = torch.from_numpy(numpy_observation["agent_pos"])
    image = torch.from_numpy(numpy_observation["pixels"])

    # Convert to float32 with image from channel first in [0,255]
    # to channel last in [0,1]
    state = state.to(torch.float32)
    image = image.to(torch.float32) / 255
    image = image.permute(2, 0, 1)

    # Send data tensors from CPU to GPU
    state = state.to(device, non_blocking=True)
    image = image.to(device, non_blocking=True)

    # Add extra (empty) batch dimension, required to forward the policy
    state = state.unsqueeze(0)
    image = image.unsqueeze(0)

    # Create the policy input dictionary
    observation = {
        "observation.state": state,
        "observation.image": image,
    }

    # Predict the next action with respect to the current observation
    with torch.inference_mode():
        action = policy.select_action(observation)

    # Prepare the action for the environment
    numpy_action = action.squeeze(0).to("cpu").numpy()

    # Step through the environment and receive a new observation
    numpy_observation, reward, terminated, truncated, info = env.step(numpy_action)
    print(f"{step=} {reward=} {terminated=}")

    # Keep track of all the rewards and frames
    rewards.append(reward)
    frames.append(env.render())

    # The rollout is considered done when the success state is reach (i.e. terminated is True),
    # or the maximum number of iterations is reached (i.e. truncated is True)
    done = terminated | truncated | done
    step += 1

if terminated:
    print("Success!")
else:
    print("Failure!")

# Get the speed of environment (i.e. its number of frames per second).
fps = env.metadata["render_fps"]

# Encode all frames into a mp4 video.
video_path = output_directory / "rollout.mp4"
imageio.mimsave(str(video_path), numpy.stack(frames), fps=fps)

print(f"Video of the evaluation is available in '{video_path}'.")

step=0 reward=0.0 terminated=False
step=1 reward=0.0 terminated=False
step=2 reward=0.0 terminated=False
step=3 reward=0.0 terminated=False
step=4 reward=0.0 terminated=False
step=5 reward=0.0 terminated=False
step=6 reward=0.0 terminated=False
step=7 reward=0.0 terminated=False
step=8 reward=0.0 terminated=False
step=9 reward=0.0 terminated=False
step=10 reward=0.0 terminated=False
step=11 reward=0.0 terminated=False
step=12 reward=0.0 terminated=False
step=13 reward=0.0 terminated=False
step=14 reward=0.0 terminated=False
step=15 reward=0.0 terminated=False
step=16 reward=0.0 terminated=False
step=17 reward=0.0 terminated=False
step=18 reward=0.0 terminated=False
step=19 reward=0.0 terminated=False
step=20 reward=0.0 terminated=False
step=21 reward=0.0 terminated=False
step=22 reward=0.0 terminated=False
step=23 reward=0.0 terminated=False
step=24 reward=0.0 terminated=False
step=25 reward=0.0 terminated=False
step=26 reward=0.0 terminated=False
step=27 reward=0.0 terminated=False
st



Video of the evaluation is available in 'outputs/eval/example_pusht_diffusion/rollout.mp4'.


NameError: name 'Video' is not defined

In [6]:
Video(video_path)

In [9]:
observation['observation.image'].shape

torch.Size([1, 3, 96, 96])