In [1]:
import numpy as np
from tqdm import tqdm
import wandb as wb
import imageio
from io import BytesIO
import pickle
import zarr
from pathlib import Path
import os
import random

from src.codecs.imagecodecs import Jpeg2k, register_codecs

In [2]:
data_base_dir = Path(os.environ.get("FURNITURE_DATA_DIR", "data"))

## Inspect the current image data file we're using

In [3]:
datapath = data_base_dir / "processed/sim/image_small/one_leg/data.zarr"

store = zarr.open(datapath, mode="r")

In [4]:
store["color_image1"].shape, store["color_image1"].chunks

((370234, 224, 224, 3), (1, 224, 224, 3))

In [5]:
# Check the storage size of the dataset in GB
store["color_image1"].nbytes / 1e9

55.730583552

## Do a one-off conversion of a dataset with chunksize 1 to 32

In [7]:
datapath = data_base_dir / "processed/sim/image_small/one_leg/data_batch_32.zarr"

store = zarr.open(datapath, mode="a")

In [None]:
# z.create_dataset(
#     "reward",
#     shape=(0,),
#     dtype=np.float32,
#     chunks=(chunksize,),
# )
# z.create_dataset(
#     "skill",
#     shape=(0,),
#     dtype=np.float32,
#     chunks=(chunksize,),
# )

In [8]:
store.create_dataset(
    "reward",
    shape=store["reward_old"].shape,
    dtype=np.int8,
    chunks=(100,),
)

# Iterate over the dataset and copy over
for i in tqdm(range(0, store["reward_old"].shape[0], 100)):
    store["reward"][i : i + 100] = store["reward_old"][i : i + 100]

  0%|          | 0/3696 [00:00<?, ?it/s]

100%|██████████| 3696/3696 [58:34<00:00,  1.05it/s]


In [9]:
store.create_dataset(
    "skill",
    shape=store["skill_old"].shape,
    dtype=np.int8,
    chunks=(100,),
)

# Iterate over the dataset and copy over
for i in tqdm(range(0, store["skill_old"].shape[0], 100)):
    store["skill"][i : i + 100] = store["skill_old"][i : i + 100]

100%|██████████| 3696/3696 [52:59<00:00,  1.16it/s]
