In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pickle
from glob import glob
from pathlib import Path
import os
from tqdm import tqdm
import pandas as pd
import zarr
import numpy as np

from src.models.vision import get_encoder
from src.data.process_demos import encode_demo
from src.visualization.render_mp4 import create_mp4

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
base_dir = Path(os.environ["FURNITURE_DATA_DIR"])

rollout_dir = base_dir / "raw" / "sim_rollouts"

file_path = rollout_dir / "index.csv"

## Index the raw rollout data

Now done in a standalone script `src.data.index_rollouts`

## Augment an existing Zarr array with new data from the index

In [4]:
base_dir = Path("/data/scratch/ankile/furniture-data/data")

In [5]:
zarr_path = (
    base_dir
    / "processed"
    / "sim"
    / "feature_separate_small"
    / "vip"
    / "one_leg"
    / "data_aug.zarr"
)

store = zarr.open(str(zarr_path), mode="a")

In [6]:
if "rollout_paths" not in store:
    print("Creating rollout_paths dataset")
    store.create_dataset("rollout_paths", shape=(0,), dtype=str)
else:
    print("rollout_paths dataset already exists")

# Remove the skills dataset if it exists
if "skills" in store:
    print("Removing skills dataset")
    del store["skills"]
else:
    print("skills dataset does not exist")

rollout_paths dataset already exists
skills dataset does not exist


In [7]:
# Read in the index file as a dataframe
index = pd.read_csv(file_path)

index = index[index["success"] == True]

# Get the paths to all the successful rollouts
paths = index["path"].values

# Compare with the paths already in the zarr file
zarr_paths = store["rollout_paths"][:]
paths = [p for p in paths if p not in zarr_paths]

len(paths)

0

In [8]:
# Just sanity check the index by loading a rollout
with open(paths[0], "rb") as f:
    rollout = pickle.load(f)

vid1 = [o["color_image1"] for o in rollout["observations"]]
vid2 = [o["color_image2"] for o in rollout["observations"]]
vid = np.concatenate([vid1, vid2], axis=2)

end_idx = np.argmax(rollout["rewards"]) + 1

create_mp4(vid[:end_idx], "test.mp4")

100%|██████████| 461/461 [00:00<00:00, 803.99it/s]

File saved as test.mp4





In [9]:
# Get an encoder
encoder = get_encoder("vip", freeze=True, device="cuda:0")
batch_size = 1024

In [13]:
# Iterate over the paths and add them to the zarr file
end_index = store["episode_ends"][-1]

for path in tqdm(paths):
    with open(path, "rb") as f:
        data = pickle.load(f)

    end_idx = np.argmax(data["rewards"]) + 1

    store["action"].append(data["actions"][:end_idx])
    store["rewards"].append(data["rewards"][:end_idx])

    store["episode_ends"].append([end_index := end_index + end_idx])
    store["furniture"].append([data["furniture"]])

    obs = data["observations"][:end_idx]
    demo_robot_states, demo_features1, demo_features2 = encode_demo(
        encoder, batch_size, obs
    )
    store["robot_state"].append(demo_robot_states)
    store["feature1"].append(demo_features1)
    store["feature2"].append(demo_features2)
    store["rollout_paths"].append([path])

  0%|          | 0/451 [00:00<?, ?it/s]

100%|██████████| 451/451 [40:20<00:00,  5.37s/it]


In [11]:
# Check length of actions after
store["episode_ends"][-1]

259640