In [20]:
from pathlib import Path
from pprint import pprint

import lerobot
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
from lerobot.common.datasets.factory import make_dataset


from hydra import compose, initialize
from omegaconf import OmegaConf

# context initialization
with initialize(version_base=None, config_path="../configs", job_name="test_app"):
    cfg = compose(config_name="default")
    print(OmegaConf.to_yaml(cfg))

device: cuda
use_amp: false
seed: 100000
dataset_repo_id: lerobot/pusht
training:
  offline_steps: 200000
  online_steps: 0
  online_steps_between_rollouts: 1
  online_sampling_ratio: 0.5
  online_env_seed: ???
  eval_freq: 5000
  save_freq: 5000
  log_freq: 250
  save_model: true
  batch_size: 64
  grad_clip_norm: 10
  lr: 0.0001
  lr_scheduler: cosine
  lr_warmup_steps: 500
  adam_betas:
  - 0.95
  - 0.999
  adam_eps: 1.0e-08
  adam_weight_decay: 1.0e-06
  delta_timestamps:
    observation.image: '[i / ${fps} for i in range(1 - ${policy.n_obs_steps}, 1)]'
    observation.state: '[i / ${fps} for i in range(1 - ${policy.n_obs_steps}, 1)]'
    action: '[i / ${fps} for i in range(1 - ${policy.n_obs_steps}, 1 - ${policy.n_obs_steps}
      + ${policy.horizon})]'
eval:
  n_episodes: 50
  batch_size: 50
  use_async_envs: false
wandb:
  enable: true
  disable_artifact: false
  project: lerobot
  notes: ''
tensorboard:
  enable: true
fps: 10
env:
  name: pusht
  task: PushT-v0
  from_pixels: t

In [65]:
# get the path to the dataset
import pandas as pd
import numpy as np
from pathlib import Path
base_path = Path("~/workspace/lerobot/local/pinpad/original").expanduser()
out_dir = Path("~/workspace/lerobot/local/pinpad").expanduser()

# list all the files in the dataset
files = list(base_path.glob("*"))
# for f in files:
#     print(f)

# print the keys
data = np.load(files[0])
# convert to a dictionary NOTE: this is necessary to make the arrays writeable for some reason
data = dict(data)
for k,v in data.items():
    print(k, v.shape)

print("Setting last is_terminal to true")
data["is_terminal"][-1] = True

image (1001, 64, 64, 3)
reward (1001,)
is_first (1001,)
is_last (1001,)
is_terminal (1001,)
discount (1001,)
action (1001, 5)
logprob (1001,)
Setting last is_terminal to true


In [73]:
import tqdm
import torch
import einops
import shutil
from PIL import Image as PILImage

from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes, save_images_concurrently
from lerobot.common.datasets.push_dataset_to_hub.compute_stats import compute_stats
from lerobot.scripts.push_dataset_to_hub import save_meta_data
from lerobot.common.datasets.video_utils import VideoFrame, encode_video_frames
from lerobot.common.datasets.utils import hf_transform_to_torch
from datasets import Dataset, Features, Image, Sequence, Value

def to_hf_dataset(data_dict, video):
    features = {}

    if video:
        features["observation.image"] = VideoFrame()
    else:
        features["observation.image"] = Image()

    features["observation.state"] = Sequence(
        length=data_dict["observation.state"].shape[1], feature=Value(dtype="float32", id=None)
    )
    features["action"] = Sequence(
        length=data_dict["action"].shape[1], feature=Value(dtype="float32", id=None)
    )
    features["episode_index"] = Value(dtype="int64", id=None)
    features["frame_index"] = Value(dtype="int64", id=None)
    features["timestamp"] = Value(dtype="float32", id=None)
    features["next.reward"] = Value(dtype="float32", id=None)
    features["next.done"] = Value(dtype="bool", id=None)
    features["index"] = Value(dtype="int64", id=None)
    # TODO(rcadene): add success
    # features["next.success"] = Value(dtype='bool', id=None)

    hf_dataset = Dataset.from_dict(data_dict, features=Features(features))
    hf_dataset.set_transform(hf_transform_to_torch)
    return hf_dataset

video = False; fps = 15
debug = False


ep_dicts = []
episode_data_index = {"from": [], "to": []}

id_from = 0
id_to = 0
ep_idx = 0
total_frames = data["action"].shape[0]
for i in tqdm.tqdm(range(total_frames)):
    id_to += 1

    if not data["is_terminal"][i]:
        continue
    print("found terminal step")

    num_frames = id_to - id_from

    image = torch.tensor(data["image"][id_from:id_to])
    # image = einops.rearrange(image, "b h w c -> b h w c")
    # image = einops.rearrange(image, "b c h w -> b h w c")
    state = torch.zeros(num_frames, 1)
    action = torch.tensor(data["action"][id_from:id_to])
    # TODO(rcadene): we have a missing last frame which is the observation when the env is done
    # it is critical to have this frame for tdmpc to predict a "done observation/state"
    # next_image = torch.tensor(data["next_observations"]["rgb"][id_from:id_to])
    # next_state = torch.tensor(data["next_observations"]["state"][id_from:id_to])
    next_reward = torch.tensor(data["reward"][id_from:id_to])
    next_done = torch.tensor(data["is_terminal"][id_from:id_to])

    ep_dict = {}

    imgs_array = [x.numpy() for x in image]
    img_key = "observation.image"
    if video:
        # save png images in temporary directory
        tmp_imgs_dir = out_dir / "tmp_images"
        save_images_concurrently(imgs_array, tmp_imgs_dir)

        # encode images to a mp4 video
        fname = f"{img_key}_episode_{ep_idx:06d}.mp4"
        video_path = out_dir / "videos" / fname
        encode_video_frames(tmp_imgs_dir, video_path, fps)

        # clean temporary images directory
        shutil.rmtree(tmp_imgs_dir)

        # store the reference to the video frame
        ep_dict[img_key] = [{"path": f"videos/{fname}", "timestamp": i / fps} for i in range(num_frames)]
    else:
        pass
        # ep_dict[img_key] = [PILImage.fromarray(x) for x in imgs_array]
        # ep_dict[img_key] = [Image.fromarray(x) for x in imgs_array]

    ep_dict["observation.state"] = state
    ep_dict["action"] = action
    ep_dict["episode_index"] = torch.tensor([ep_idx] * num_frames, dtype=torch.int64)
    ep_dict["frame_index"] = torch.arange(0, num_frames, 1)
    ep_dict["timestamp"] = torch.arange(0, num_frames, 1) / fps
    # ep_dict["next.observation.image"] = next_image
    # ep_dict["next.observation.state"] = next_state
    ep_dict["next.reward"] = next_reward
    ep_dict["next.done"] = next_done
    ep_dicts.append(ep_dict)

    episode_data_index["from"].append(id_from)
    episode_data_index["to"].append(id_from + num_frames)

    id_from = id_to
    ep_idx += 1

    # process first episode only
    if debug:
        break
if len(ep_dicts) == 0:
    print("No terminal step found in the dataset")
else:
    data_dict = concatenate_episodes(ep_dicts)
    data_dict, episode_data_index

    hf_dataset = to_hf_dataset(data_dict, video)
    info = {"fps": fps, "video": video}

    hf_dataset = hf_dataset.with_format(None)  # to remove transforms that cant be saved

    lerobot_dataset = LeRobotDataset.from_preloaded(
    repo_id="pinpad",
    version=0.0,
    hf_dataset=hf_dataset,
    episode_data_index=episode_data_index,
    info=info,
    videos_dir=video_path,
    )
    stats = compute_stats(lerobot_dataset, 64, 1)

    hf_dataset.save_to_disk(str(out_dir / "train"))
    save_meta_data(info, stats, episode_data_index, str(out_dir))

100%|██████████| 1001/1001 [00:00<00:00, 77599.08it/s]


found terminal step


ValueError: Keys mismatch: between {'observation.image': Image(mode=None, decode=True, id=None), 'observation.state': Sequence(feature=Value(dtype='float32', id=None), length=1, id=None), 'action': Sequence(feature=Value(dtype='float32', id=None), length=5, id=None), 'episode_index': Value(dtype='int64', id=None), 'frame_index': Value(dtype='int64', id=None), 'timestamp': Value(dtype='float32', id=None), 'next.reward': Value(dtype='float32', id=None), 'next.done': Value(dtype='bool', id=None), 'index': Value(dtype='int64', id=None)} (source) and {'observation.state': Sequence(feature=Value(dtype='float32', id=None), length=1, id=None), 'action': Sequence(feature=Value(dtype='float32', id=None), length=5, id=None), 'episode_index': Value(dtype='int64', id=None), 'frame_index': Value(dtype='int64', id=None), 'timestamp': Value(dtype='float32', id=None), 'next.reward': Value(dtype='float32', id=None), 'next.done': Value(dtype='bool', id=None), 'index': Value(dtype='int64', id=None)} (target).
{'observation.image'} are missing from target and set() are missing from source
The 'source' features come from dataset_info.json, and the 'target' ones are those of the dataset arrow file.

In [None]:

# write the dictionary to disk as a json file

In [None]:

from datasets import load_dataset, load_from_disk
load_from_disk()