In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import furniture_bench

import os
import pickle
import numpy as np
from pathlib import Path
import zarr

from src.visualization.render_mp4 import (
    data_to_video,
    unpickle_data,
    create_mp4_jupyter,
    mp4_from_pickle_jupyter,
)
from src.gym import get_env

from tqdm import tqdm

In [None]:
os.environ["DATA_DIR_RAW"] = "/data/scratch-oc40/pulkitag/ankile/furniture-data/"

In [None]:
teleopdir = Path(os.environ["DATA_DIR_RAW"]) / "raw" / "sim" / "teleop"

In [None]:
pickles = list(teleopdir.rglob("*success.pkl"))

len(pickles)

In [None]:
tmp_rollout_dir = Path(".") / "tmp_rollout_dir"
tmp_rollout_dir.mkdir(exist_ok=True)

In [None]:
total = 0

for i, p in enumerate(pickles):
    data = unpickle_data(p)
    if not data["success"]:
        continue
    ims = data_to_video(data)
    video_tag = create_mp4_jupyter(ims, tmp_rollout_dir / f"video{i}.mp4")
    display(video_tag)
    total += 1

print(f"Total successes: {total}")

## Play back teleop demos in the sim

### Teleop demo

In [None]:
data = unpickle_data(
    "/data/scratch/ankile/furniture-diffusion/data/raw/sim/one_leg/low/2024-01-18-14:40:50/2024-01-18-14:40:50.pkl"
)
data = unpickle_data(
    "/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim/scripted/one_leg/low/2024-01-12-16:21:52/2024-01-12-16:21:52_success.pkl"
)

lamp = unpickle_data(
    "/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim/lamp/low/2024-01-18-15:00:10/2024-01-18-15:00:10.pkl"
)

data["observations"][0].keys()

In [None]:
env = get_env(
    gpu_id=0,
    # obs_type="image",
    furniture="lamp",
    num_envs=1,
    randomness="low",
    resize_img=True,
    act_rot_repr="quat",
    ctrl_mode="diffik",
    verbose=False,
)

In [None]:
def play_back_trajectory(actions, env, filename):
    imgs1, imgs2 = [], []

    obs = env.reset()
    imgs1.append(obs["color_image1"])
    imgs2.append(obs["color_image2"])

    for action in tqdm(actions):
        obs, _, _, _ = env.step(action)
        imgs1.append(obs["color_image1"])
        imgs2.append(obs["color_image2"])

    imgs1 = np.array([img.cpu().numpy() for img in imgs1])
    imgs2 = np.array([img.cpu().numpy() for img in imgs2])

    imgs1 = imgs1.reshape(-1, 224, 224, 3)
    imgs2 = imgs2.reshape(-1, 224, 224, 3)

    imgs1.shape, imgs2.shape
    images = np.concatenate([imgs1, imgs2], axis=2)

    video_tag = create_mp4_jupyter(images, tmp_rollout_dir / filename)
    display(video_tag)

In [None]:
play_back_trajectory(lamp["actions"], env, "scripted_demo_lamp_1.mp4")

In [None]:
data1 = unpickle_data(
    "/data/scratch/ankile/furniture-diffusion/data/raw/sim/one_leg/low/2024-01-18-14:40:50/2024-01-18-14:40:50.pkl"
)

ims1 = data_to_video(lamp)

video_tag = create_mp4_jupyter(ims1, tmp_rollout_dir / "test_lamp.mp4")
display(video_tag)

### Scripted demo

In [None]:
import zarr

z = zarr.open(
    "/data/scratch/ankile/furniture-data/processed/sim/image/lamp/data_batch_32.zarr"
)

In [None]:
ep_num = 3

start_idx = z["episode_ends"][ep_num - 1]
end_idx = z["episode_ends"][ep_num]

actions = z["action"][start_idx:end_idx]

actions.shape

In [None]:
play_back_trajectory(actions, env, "scripted_demo_1.mp4")

In [None]:
import numpy as np
from tqdm import tqdm
import zarr
import random

In [None]:
chunk_size = (1_000, 224, 224, 3)  # Adjust based on your needs

In [None]:
tmp = zarr.open(
    "/data/scratch/ankile/furniture-data/processed/sim/image/one_leg/data_batch_32.zarr",
    mode="r",
)

tmp["color_image1"].shape, tmp["color_image2"].shape

In [None]:
np_img1 = np.zeros(tmp["color_image1"].shape, dtype=np.uint8)

for i in tqdm(range(0, tmp["color_image1"].shape[0], 1024)):
    np_img1[i : i + 1024] = tmp["color_image1"][i : i + 1024]

In [None]:
np_img2 = np.zeros(tmp["color_image2"].shape, dtype=np.uint8)

for i in tqdm(range(0, tmp["color_image2"].shape[0], 1024)):
    np_img2[i : i + 1024] = tmp["color_image2"][i : i + 1024]

In [None]:
np_img1.shape, np_img2.shape

In [None]:
np_img1.shape, np_img2.shape

In [None]:
# Create a directory store
store = zarr.DirectoryStore("stores/directory_store.zarr")

# Define chunk size
chunk_size = (1_000, 224, 224, 3)  # Adjust based on your needs

# Create Zarr arrays
color_image1 = zarr.create(
    shape=np_img1.shape,
    chunks=chunk_size,
    dtype=np_img1.dtype,
    store=store,
    path="color_image1",
    overwrite=True,
)
color_image2 = zarr.create(
    shape=np_img2.shape,
    chunks=chunk_size,
    dtype=np_img2.dtype,
    store=store,
    path="color_image2",
    overwrite=True,
)

# Populate the arrays
color_image1[:] = np_img1
color_image2[:] = np_img2

In [None]:
# Create a directory store
store = zarr.DirectoryStore("stores/directory_store.zarr")

# Now wrap the store with LRU cache
cached_store = zarr.LRUStoreCache(store, max_size=None)

# Access the arrays using the cached store for later operations
color_image1 = zarr.open_array(cached_store, path="color_image1", mode="r")
color_image2 = zarr.open_array(cached_store, path="color_image2", mode="r")

In [None]:
color_image1

In [None]:
for i in tqdm(range(1_000)):
    # Get just random chunks of 16 images
    random_idx = random.randint(0, color_image1.shape[0] - 16)
    color_image1[i : i + 16]
    color_image2[i : i + 16]

In [None]:
# iterate over the arrays to test the cache
for i in tqdm(range(0, color_image1.shape[0], 1024)):
    color_image1[i : i + 1024]
    color_image2[i : i + 1024]

In [None]:
# This time, hope it's much faster
for i in tqdm(range(1_000)):
    random_idx = random.randint(0, color_image1.shape[0] - 16)
    color_image1[i : i + 16]
    color_image2[i : i + 16]

In [None]:
# Create a ZipStore
zip_store = zarr.ZipStore("stores/zip_store.zip", mode="w")

# Define chunk size

# Create Zarr arrays within the ZipStore
color_image1 = zarr.create(
    shape=np_img1.shape,
    chunks=chunk_size,
    dtype=np_img1.dtype,
    store=zip_store,
    path="color_image1",
)
color_image2 = zarr.create(
    shape=np_img2.shape,
    chunks=chunk_size,
    dtype=np_img2.dtype,
    store=zip_store,
    path="color_image2",
)

# Populate the arrays
color_image1[:] = np_img1
color_image2[:] = np_img2

# It's important to close the ZipStore
zip_store.close()

In [None]:
import zarr
import numpy as np
from concurrent.futures import ThreadPoolExecutor


def load_chunk(arr, chunk_slice):
    return arr[chunk_slice]


# Function to get all chunk slices for a given Zarr array
def get_chunk_slices(shape, chunks):
    ranges = [range(0, s, c) for s, c in zip(shape, chunks)]
    return [
        tuple(slice(i, min(i + c, s)) for i, c, s in zip(start, chunks, shape))
        for start in np.ndindex(*map(len, ranges))
    ]


# Open the ZipStore in read-only mode
zip_store = zarr.ZipStore("stores/zip_store.zip", mode="r")

# Open the arrays
color_image1 = zarr.open_array(store=zip_store, path="color_image1", mode="r")
color_image2 = zarr.open_array(store=zip_store, path="color_image2", mode="r")

# Get the chunk slices for each array
chunk_slices1 = get_chunk_slices(color_image1.shape, color_image1.chunks)
chunk_slices2 = get_chunk_slices(color_image2.shape, color_image2.chunks)

# Create empty arrays to hold the data in memory
color_image1_data = np.empty(color_image1.shape, dtype=color_image1.dtype)
color_image2_data = np.empty(color_image2.shape, dtype=color_image2.dtype)

# Load data in parallel
with ThreadPoolExecutor() as executor:
    # Load color_image1
    futures1 = [executor.submit(load_chunk, color_image1, cs) for cs in chunk_slices1]
    for cs, future in enumerate(futures1):
        color_image1_data[chunk_slices1[cs]] = future.result()

    # Load color_image2
    futures2 = [executor.submit(load_chunk, color_image2, cs) for cs in chunk_slices2]
    for cs, future in enumerate(futures2):
        color_image2_data[chunk_slices2[cs]] = future.result()

# Close the store when done
zip_store.close()

In [None]:
np_img1.shape, np_img2.shape

In [None]:
from numcodecs import Blosc


compressor = Blosc(cname="zstd", clevel=9, shuffle=Blosc.BITSHUFFLE)

# Create a directory store or ZipStore
store = zarr.DirectoryStore("stores/directory_store_2.zarr")
# Or for ZipStore: store = zarr.ZipStore('path/to/zip_store.zip')

# Create Zarr arrays with the compressor
color_image1 = zarr.array(
    np_img1,
    chunks=chunk_size,
    compressor=compressor,
    store=store,
    path="color_image1",
)
color_image2 = zarr.array(
    np_img1,
    chunks=chunk_size,
    compressor=compressor,
    store=store,
    path="color_image2",
)

In [None]:
# Open the DirectoryStore in read-only mode
store = zarr.DirectoryStore("stores/directory_store_2.zarr")

# Open the arrays
color_image1 = zarr.open_array(store=store, path="color_image1", mode="r")
color_image2 = zarr.open_array(store=store, path="color_image2", mode="r")

In [None]:
color_image1.shape, color_image2.shape

In [None]:
# Show an image
import matplotlib.pyplot as plt

img_idx = 190

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))

ax1.imshow(color_image2[img_idx])
ax2.imshow(tmp["color_image2"][img_idx])

In [None]:
tmp1 = color_image1[:]
tmp2 = color_image2[:]

In [None]:
tmp1.shape, tmp2.shape

In [None]:
type(tmp1)

In [None]:
for i in tqdm(range(1_000)):
    random_idx = random.randint(0, tmp1.shape[0] - 16)
    color_image2[i : i + 16]

In [None]:
import zarr

z = zarr.open(
    "/data/scratch/ankile/furniture-data/processed/sim/image/one_leg/scripted.zarr",
    mode="r",
)

In [None]:
z["color_image1"].shape, z["color_image2"].shape

In [None]:
z["episode_ends"][:]

## Look at MP4 compression rate

In [None]:
# Make 1280 by 720 videos of length 1_000
vid1280 = np.zeros((1_000, 720, 1280, 3), dtype=np.uint8)
vid1280[:, :180, :320, :] = np.random.randint(
    0, 255, size=(1_000, 180, 320, 3), dtype=np.uint8
)
# vid240 = np.random.randint(0, 255, size=(1_000, 240, 240, 3), dtype=np.uint8)

In [None]:
with open("test_1280.npy", "wb") as f:
    pickle.dump(vid1280, f)

In [None]:
# 1000 * 720 * 1280 * 3 / 1e9
1000 * 240 * 240 * 3 / 1e9

In [None]:
# Write it as an mp4
create_mp4_jupyter(vid1280, "test_1280.mp4")

In [None]:
import gzip
import numpy
import bz2
import lzma
import pickle

from src.visualization.render_mp4 import unpickle_data

In [None]:
data1 = unpickle_data(
    "/data/scratch/ankile/furniture-data/raw-old/sim/scripted/image/one_leg/low/2023-10-25-17:49:38/2023-10-25-17:49:38.pkl"
)

In [None]:
%%timeit
with open("no_compression.pickle", "wb") as f:
    pickle.dump(data1, f)


In [None]:
%%timeit
# Read them all back and verify they are the same
with open("no_compression.pickle", "rb") as f:
    data1 = pickle.load(f)

In [None]:
%%timeit
with gzip.open("gzip_test.gz", "wb") as f:
    pickle.dump(data1, f)


In [None]:
%%timeit
with gzip.open("gzip_test.gz", "rb") as f:
    data2 = pickle.load(f)


In [None]:
%%timeit
with bz2.BZ2File("bz2_test.pbz2", "wb") as f:
    pickle.dump(data1, f)


In [None]:
%%timeit
with bz2.BZ2File("bz2_test.pbz2", "rb") as f:
    data3 = pickle.load(f)


In [None]:
%%timeit
with lzma.open("lzma_test.xz", "wb") as f:
    pickle.dump(data1, f)

In [None]:
%%timeit
with lzma.open("lzma_test.xz", "rb") as f:
    data4 = pickle.load(f)


In [None]:
data1.keys()  # , data2.keys(), data3.keys(), data4.keys()

In [None]:
data1["observations"][0].keys(), data1["observations"][0]["image_size"]

In [None]:
# Check the first frame of the video
(
    np.all(
        data1["observations"][0]["color_image1"]
        == data2["observations"][0]["color_image1"]
    ),
    np.all(
        data1["observations"][0]["color_image1"]
        == data3["observations"][0]["color_image1"]
    ),
    np.all(
        data1["observations"][0]["color_image1"]
        == data4["observations"][0]["color_image1"]
    ),
)

## Inspect the teleop demos that are continued from one_leg scripted

In [None]:
import zarr
import numpy as np
from pathlib import Path
from src.visualization.render_mp4 import (
    mp4_from_pickle_jupyter,
    unpickle_data,
    pickle_data,
)
from furniture_bench.robot.robot_state import filter_and_concat_robot_state
from tqdm import tqdm

tmp_rollout_dir = Path(".") / "tmp_rollout_dir"

In [None]:
demos = list(
    Path(
        "/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim/square_table/teleop"
    ).rglob("**/success/*.pkl")
)

len(demos)

In [None]:
# Look at them
for d in demos:
    mp4_from_pickle_jupyter(d, tmp_rollout_dir / f"{d.stem}.mp4", fps=20)

In [None]:
for i, d in enumerate(demos):
    print(i, d)

In [None]:
data = unpickle_data(demos[48])

In [None]:
filter_and_concat_robot_state(data["observations"][0]["robot_state"])

In [None]:
ROBOT_STATES = [
    "ee_pos",
    "ee_quat",
    "ee_pos_vel",
    "ee_ori_vel",
    "gripper_width",
]

robot_state = data["observations"][0]["robot_state"]
parts_poses = data["observations"][0]["parts_poses"]

In [None]:
robot_state.item()

In [None]:
current_robot_state = []
for rs in ROBOT_STATES:
    if rs == "gripper_width" and robot_state[rs].shape == ():
        robot_state[rs] = np.array([robot_state[rs]])
    current_robot_state.append(robot_state[rs])

In [None]:
# One-off piece of code to fix the robot state
for demo in tqdm(demos):
    data = unpickle_data(demo)
    for o in data["observations"]:
        if isinstance(o["robot_state"], np.ndarray):
            o["robot_state"] = o["robot_state"].item()
        elif isinstance(o["robot_state"], dict):
            continue

        raise ValueError("Unknown type of robot state")

    # Save it back
    pickle_data(data, demo)

In [None]:
# One-off piece of code to fix the rewards
import torch


for demo in tqdm(demos):
    data = unpickle_data(demo)

    for i in range(len(data["rewards"])):
        if isinstance(data["rewards"][i], torch.Tensor):
            data["rewards"][i] = data["rewards"][i].item()
        elif isinstance(data["rewards"][i], float):
            continue
        elif isinstance(data["rewards"][i], int):
            data["rewards"][i] = float(data["rewards"][i])
        else:
            print("in else")
            print(data["rewards"][i])
            raise ValueError("Unknown type of reward:" + str(type(data["rewards"][i])))

    # Save it back
    pickle_data(data, demo)

In [None]:
len(data["rewards"])

In [None]:
# Look at why there's a difference in the length of the arrays
data = unpickle_data(demos[0])

data.keys()

In [None]:
len(data["observations"]), len(data["rewards"]), len(data["actions"])

## Look at my new `one_leg` teleop demos

In [None]:
z = zarr.open(
    "/data/scratch/ankile/furniture-data/processed/sim/one_leg/teleop/low/success.zarr",
    mode="r",
)

In [None]:
print(z["episode_ends"].shape)

z["episode_ends"][:]

In [None]:
ep_idx = 10
end_idx = z["episode_ends"][ep_idx]

img1 = z["color_image1"][:end_idx]
img2 = z["color_image2"][:end_idx]
img = np.concatenate([img1, img2], axis=2)

print(img.shape)

create_mp4_jupyter(img, "test.mp4", fps=30)

## Produce examples of each of the tasks

In [None]:
from src.visualization.render_mp4 import mp4_from_pickle_jupyter, unpickle_data
from src.common.files import get_raw_paths
from pathlib import Path

In [None]:
lamp = "/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim/lamp/rollout/low/success/2024-02-10T16:04:24.pkl"
rtable = "/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim/round_table/teleop/low/success/2024-02-09T19:05:14.pkl"
stable = "/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim/square_table/teleop/low/success/2024-01-24T11:43:40.pkl"
# chair = "/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim/chair/teleop/med/success/2024-02-10T12:32:45.pkl"

files = [
    lamp,
    rtable,
    # chair,
    stable,
]

In [None]:
# Make videos of all the files
for f in files:
    f = Path(f)
    print(unpickle_data(f).keys())
    mp4_from_pickle_jupyter(f, f"{f.stem}.mp4", fps=20)

## Square table demos 

In [None]:
import furniture_bench

import os
import pickle
import numpy as np
from pathlib import Path
import zarr

from src.visualization.render_mp4 import (
    data_to_video,
    unpickle_data,
    create_mp4_jupyter,
    mp4_from_pickle_jupyter,
)
from src.gym import get_env
from src.common.files import get_raw_paths

from tqdm import tqdm

In [None]:
pickles = get_raw_paths(
    task="square_table",
    randomness="low",
    demo_source="teleop",
)

In [None]:
for p in pickles:
    mp4_from_pickle_jupyter(p, f"{p.stem}.mp4", fps=20)

## Look at a couple of `one_leg` demos

In [None]:
from src.common.files import get_raw_paths

In [None]:
pickles = get_raw_paths(
    task="one_leg",
    randomness="low",
    demo_source="teleop",
    demo_outcome="success",
    environment="sim",
)