In [6]:
"""
Convert from robomimic hdf5 to tensordict format
"""

import h5py
import torch
from tensordict import TensorDict
import numpy as np

save_dataset = True
data_path = "/scr/shared/datasets/robot_pref/stack_panda/stack_panda.hdf5"

with h5py.File(data_path, 'r') as f:
    data = f["data"]
    
    total_len = 0
    actions = []
    episodes = []
    images = []
    obs = []
    rewards = []
    
    for demo in sorted(data.keys(), key=lambda x: int(x.split('_')[1])):
            
        demo_data = data[demo]
        demo_len = len(demo_data["actions"])
        
        actions.append(demo_data["actions"][:])
        episodes.append(torch.full((demo_len,), int(demo.split('_')[1])))
        images.append(demo_data["obs"]["agentview_image"][:])

        # observation consists of these
        # ["robot0_eef_pos", "robot0_eef_quat", "robot0_gripper_qpos", "object"]
        obs.append(np.concatenate([
            demo_data["obs"]["robot0_eef_pos"][:],
            demo_data["obs"]["robot0_eef_quat"][:], 
            demo_data["obs"]["robot0_gripper_qpos"][:],
            demo_data["obs"]["object"][:] # obs varies by task
        ], axis=1))

        # rewards.append(demo_data["rewards"][:])
        
        total_len += demo_len
    
    # Convert numpy arrays to tensors and concatenate all data
    tensordict = TensorDict({
        "action": torch.cat([torch.from_numpy(a).float() for a in actions]),
        "episode": torch.cat(episodes),
        "image": torch.cat([torch.from_numpy(img) for img in images]), 
        "obs": torch.cat([torch.from_numpy(o).float() for o in obs]),
        # "reward": torch.cat([torch.from_numpy(r).float() for r in rewards])
    }, batch_size=torch.Size([]))
    
    print(tensordict)

    if save_dataset:
        # save to data path but as a .pt instead of .hdf5
        # Convert .hdf5 extension to .pt
        save_path = str(data_path).replace('.hdf5', '.pt')
        print(f"saving to {save_path}")
        torch.save(tensordict, save_path)


TensorDict(
    fields={
        action: Tensor(shape=torch.Size([54866, 7]), device=cpu, dtype=torch.float32, is_shared=False),
        episode: Tensor(shape=torch.Size([54866]), device=cpu, dtype=torch.int64, is_shared=False),
        image: Tensor(shape=torch.Size([54866, 84, 84, 3]), device=cpu, dtype=torch.uint8, is_shared=False),
        obs: Tensor(shape=torch.Size([54866, 32]), device=cpu, dtype=torch.float32, is_shared=False)},
    batch_size=torch.Size([]),
    device=None,
    is_shared=False)
saving to /scr/shared/datasets/robot_pref/stack_panda/stack_panda.pt


In [8]:
import h5py
import numpy as np

def print_stats(arr, name):
    arr = np.array(arr)
    print(f"{name}:")
    print(f"  shape: {arr.shape}")
    print(f"  mean: {np.mean(arr, axis=0)}")
    print(f"  min: {np.min(arr, axis=0)}")
    print(f"  max: {np.max(arr, axis=0)}")
    print()

path = "/scr/shared/datasets/robot_pref/lift_panda/lift_panda.hdf5"
sawyer_path = "/scr/shared/datasets/robot_pref/lift_sawyer/lift_sawyer.hdf5"

for p in [path, sawyer_path]:
    print(f"==== {p} ====")
    with h5py.File(p, 'r') as f:
        data = f["data"]
        all_actions = []
        for key in data.keys():
            actions = data[key]["actions"][:]
            all_actions.append(actions)
        if all_actions:
            all_actions = np.concatenate(all_actions, axis=0)
            print_stats(all_actions, "actions")
        else:
            print("No demos found.")


==== /scr/shared/datasets/robot_pref/lift_panda/lift_panda.hdf5 ====
actions:
  shape: (68148, 7)
  mean: [ 0.03534264  0.00816505 -0.03161107 -0.00074413  0.00165334 -0.00505467
 -0.65613077]
  min: [-1. -1. -1. -1. -1. -1. -1.]
  max: [1. 1. 1. 1. 1. 1. 1.]

==== /scr/shared/datasets/robot_pref/lift_sawyer/lift_sawyer.hdf5 ====
actions:
  shape: (66282, 7)
  mean: [ 0.0294056  -0.04304439 -0.04729058  0.00527456 -0.00422478 -0.01669778
 -0.65001056]
  min: [-1.         -1.         -1.         -0.44683041 -1.         -1.
 -1.        ]
  max: [1.         1.         1.         1.         0.35533006 1.
 1.        ]



In [3]:
import h5py
import numpy as np

data_path = "/scr/shared/datasets/robot_pref/lift_panda/lift_panda.hdf5"
other_data_path = "/tmp/medium_random/lift/demo_src_lift_task_Lift_r_Panda/demo_failed.hdf5"
combined_path = "/tmp/combined/lift/demo_src_lift_task_Lift_r_Panda/demo_failed.hdf5"

# Open the destination file in append mode
with h5py.File(data_path, 'a') as f, h5py.File(other_data_path, 'r') as f2:
    data = f["data"]
    data2 = f2["data"]

    print((data["demo_1"].keys()), "existing demos")

    # # Get current max index in data
    # existing_keys = list(data.keys())
    # max_index = max(int(k.split('_')[-1]) for k in existing_keys)

    # for k in data2.keys():
    #     # Compute new index
    #     new_index = max_index + 1
    #     new_key = f"demo_{new_index}"
        
    #     # Copy group or dataset
    #     f.copy(data2[k], data, name=new_key)

    #     # Increment for next copy
    #     max_index += 1


<KeysViewHDF5 ['actions', 'datagen_info', 'obs', 'src_demo_inds', 'src_demo_labels', 'states']> existing demos
