In [28]:
import zarr
from pathlib import Path
from tqdm import trange

from src.dataset.dataset import DaggerDataset

In [29]:
inp_path = Path("/data/scratch/ankile/furniture-data/processed/diffik/sim/one_leg/teleop/low/success.zarr")
out_path = Path("/scratch/furniture-data/processed/diffik/sim/one_leg/teleop/low/success.zarr")

In [30]:
# Read the input zarr store and create the output zarr store
source = zarr.open(inp_path)
target = zarr.open(out_path, mode='w')

In [31]:
# Copy over `robot_state`, `action/pos`, `episode_ends` arrays as is
target.create_dataset('robot_state', data=source['robot_state'], chunks=source['robot_state'].chunks)
target.create_dataset('action', data=source['action/pos'], chunks=source['action/pos'].chunks)
target.create_dataset('episode_ends', data=source['episode_ends'], chunks=source['episode_ends'].chunks)

<zarr.core.Array '/episode_ends' (25,) uint32>

In [32]:
# Copy over `color_image1` and `color_image2` arrays by loading a chunk at a time and writing to the output store with chunksize 1 along the first axis
for key in ['color_image1', 'color_image2']:
    source_array = source[key]
    target_array = target.create_dataset(key, shape=source_array.shape, chunks=(1, *source_array.shape[1:]), dtype=source_array.dtype)

    # Iterate over the source array and copy over whole chunks to the target array
    for i in trange(0, source_array.shape[0], source_array.chunks[0]):
        target_array[i:i+source_array.chunks[0]] = source_array[i:i+source_array.chunks[0]]

100%|██████████| 12/12 [00:03<00:00,  3.28it/s]
100%|██████████| 12/12 [00:05<00:00,  2.23it/s]


In [33]:
list(target.keys())

['action', 'color_image1', 'color_image2', 'episode_ends', 'robot_state']

In [18]:
from src.dataset.normalizer import LinearNormalizer


norm = LinearNormalizer()
norm.fit({
    "robot_state": source['robot_state'][:],
    "action/pos": source['action/pos'][:],
})


dataset = DaggerDataset(
    dataset_path=out_path,
    action_horizon=8,
    obs_horizon=1,
    pred_horizon=32,
    normalizer=norm,
)

AttributeError: 'DaggerDataset' object has no attribute 'action_key'

In [14]:
dataset[0]["action"].shape, dataset[0]["color_image1"].shape, dataset[0]["color_image2"].shape

(torch.Size([32, 10]),
 torch.Size([1, 240, 320, 3]),
 torch.Size([1, 240, 320, 3]))

In [15]:
dataset.episode_ends[0]

401