In [None]:
import zarr
from src.common.files import get_processed_paths
from src.dataset.zarr import combine_zarr_datasets

In [None]:
import os
import zipfile
import zarr

tasks = ["one_leg", "lamp", "round_table"]
randomness_levels = ["low", "med"]
base_dir = "/data/scratch/ankile/furniture-data/processed/diffusion-adapt"

for task in tasks:
    for randomness in randomness_levels:
        paths = get_processed_paths(
            controller="diffik",
            demo_source="teleop",
            domain="sim",
            task=task,
            demo_outcome="success",
            randomness=[randomness, f"{randomness}_perturb"],
        )

        z1 = zarr.open(paths[0])
        z2 = zarr.open(paths[1])

        combined_data, metadata = combine_zarr_datasets(
            paths,
            [
                "parts_poses",
                "robot_state",
                "action/pos",
            ],
            max_episodes=None,
            max_ep_cnt=None,
        )

        output_path = os.path.join(base_dir, f"{task}_{randomness}.zarr")
        z_combined = zarr.open(output_path, mode="w")
        for key, value in combined_data.items():
            z_combined.array(key, value)

        # Zip the Zarr directory
        zip_path = f"{output_path}.zip"
        with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
            for root, _, files in os.walk(output_path):
                for file in files:
                    file_path = os.path.join(root, file)
                    zf.write(file_path, os.path.relpath(file_path, output_path))


In [None]:
import os
import zipfile
import zarr

tasks = ["one_leg"]  # , "lamp", "round_table"]
randomness_levels = ["low"]  #, "med"]
base_dir = "/data/scratch/ankile/furniture-data/processed/diffik/sim/one_leg/teleop"

for task in tasks:
    for randomness in randomness_levels:
        paths = get_processed_paths(
            controller="diffik",
            demo_source="teleop",
            domain="sim",
            task=task,
            demo_outcome="success",
            randomness=[randomness, f"{randomness}_perturb"],
        )

        z1 = zarr.open(paths[0])
        z2 = zarr.open(paths[1])

        combined_data, metadata = combine_zarr_datasets(
            paths,
            [
                "color_image1",
                "color_image2",
                "parts_poses",
                "robot_state",
                "action/pos",
                "action/delta",
            ],
            max_episodes=None,
            max_ep_cnt=None,
        )

        output_path = os.path.join(base_dir, f"{task}_{randomness}.zarr")
        z_combined = zarr.open(output_path, mode="w")
        for key, value in combined_data.items():
            z_combined.array(key, value)

        # Update the metadata
        # Original metadata for the low dataset
        # {
        #     "calculated_pos_action_from_delta": true,
        #     "chunksize": 1000,
        #     "controller": "diffik",
        #     "demo_outcome": "success",
        #     "demo_source": "teleop",
        #     "domain": "sim",
        #     "furniture": "one_leg",
        #     "mean_episode_length": 457,
        #     "n_episodes": 25,
        #     "n_timesteps": 11433,
        #     "noop_threshold": 0.0,
        #     "random_seed": 0,
        #     "randomize_order": false,
        #     "randomness": "low",
        #     "rotation_mode": "rot_6d",
        #     "suffix": null,
        #     "time_created": "2024-05-28T19:25:29.132477-04:00",
        #     "time_finished": "2024-05-28T19:25:40.612125-04:00"
        # }

        # Set the defaults
        z_combined.attrs.update(z1.attrs)

        # Update the metadata with the combined data
        z_combined.attrs["n_episodes"] += z2.attrs["n_episodes"]
        z_combined.attrs["n_timesteps"] += z2.attrs["n_timesteps"]
        z_combined.attrs["mean_episode_length"] = z_combined.attrs["n_timesteps"] / z_combined.attrs["n_episodes"]


        