In [1]:
# !pip3 install numpy
# !pip3 install matplotlib
# !pip3 install scipy
# !pip3 install tensorflow_datasets
# !pip3 install opencv-python
# !pip3 install h5py
# !pip3 install tensorflow
# !pip3 install pandas
# !pip3 install black
# !pip3 install nbqa

In [2]:
import subprocess
import json
import glob

In [3]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.transform import Rotation as R
from PIL import Image
from IPython import display
import tensorflow_datasets as tfds
import cv2
import h5py
import tensorflow as tf
import pandas as pd
import os

In [4]:
tf.data.experimental.enable_debug_mode()
tf.config.run_functions_eagerly(True)

In [5]:
builder = tfds.builder_from_directory(builder_dir="../../data/droid_100/1.0.0/")
print(builder.info.features)

FeaturesDict({
    'episode_metadata': FeaturesDict({
        'file_path': string,
        'recording_folderpath': string,
    }),
    'steps': Dataset({
        'action': Tensor(shape=(7,), dtype=float64),
        'action_dict': FeaturesDict({
            'cartesian_position': Tensor(shape=(6,), dtype=float64),
            'cartesian_velocity': Tensor(shape=(6,), dtype=float64),
            'gripper_position': Tensor(shape=(1,), dtype=float64),
            'gripper_velocity': Tensor(shape=(1,), dtype=float64),
            'joint_position': Tensor(shape=(7,), dtype=float64),
            'joint_velocity': Tensor(shape=(7,), dtype=float64),
        }),
        'discount': Scalar(shape=(), dtype=float32),
        'is_first': bool,
        'is_last': bool,
        'is_terminal': bool,
        'language_instruction': string,
        'language_instruction_2': string,
        'language_instruction_3': string,
        'observation': FeaturesDict({
            'cartesian_position': Tensor(shape

In [6]:
def load_dataset(data, step_size=1):
    return zip(*[(data[i][:3], data[i][3:]) for i in range(0, len(data), step_size)])

In [11]:
def get_information(i):
    return {
        "dataFolderName": "robot_data_example",
        "startTime": "2024-09-21T10:00:00Z",
        "endTime": "2024-09-21T12:00:00Z",
        "robotEmbodiment": "ALOHA",
        "robotSerialNumber": "RS123456",
        "videoSamplingRate": 10,
        "armSamplingRate": 50,
        "sensorSamplingRate": 60,
        "operatorName": "John Doe",
        "taskDescription": "Sample Task",
        "subtaskDescription": "Subtask Description",
        "taskState": "SUCCESS",
        "subtaskState": "SUCCESS",
        "dataLength": 0,
        "durationInSeconds": 0,
        "cameras": [
            "exterior_image_1_left",
            "exterior_image_2_left",
            "wrist_image_left",
        ],
        "joints": ["cartesian_position"],
    }

In [13]:
# Load the dataset
ds = tfds.load("droid_100", data_dir="../../data", split="train")

# Create an iterator
ds_iter = iter(ds)

for i in range(6):  # range(len(ds)):
    data_folder = f"../public/data/droid_{i:08d}"
    subprocess.call(["mkdir", "-p", data_folder])

    # Save information
    information = get_information(i)
    information["dataFolderName"] = f"droid_{i:08d}"
    information["taskDescription"] = f"pick up an item"
    information["subtaskDescription"] = f"reach out and pick up an item"

    subprocess.call(["mkdir", "-p", f"{data_folder}/trajectories"])
    images = {}
    trajectories = {}
    cat_pose = []
    # Save Trajectory
    episode = next(ds_iter)
    for step in episode["steps"]:
        for joint in information["joints"]:
            if joint not in trajectories.keys():
                trajectories[joint] = []
            trajectories[joint].append(step["action_dict"][joint].numpy())
        for c in information["cameras"]:
            if c not in images.keys():
                images[c] = []

            img = Image.fromarray(
                np.concatenate((step["observation"][c].numpy(),), axis=1)
            )

            # Get the current width and height of the image
            width, height = img.size

            # Calculate the new width while keeping the aspect ratio
            aspect_ratio = width / height
            new_height = 100
            new_width = int(new_height * aspect_ratio)

            # Resize the image
            resized_image = img.resize(
                (new_width, new_height), Image.Resampling.LANCZOS
            )

            # Save or show the resized image
            images[c].append(resized_image)

    for joint in information["joints"]:
        df = pd.DataFrame(
            trajectories[joint], columns=["x", "y", "z", "roll", "pitch", "yaw"]
        )
        df.to_json(
            f"{data_folder}/trajectories/{joint}__trajectory.json",
            orient="records",
            index=False,
        )

    # Save Image
    subprocess.call(["mkdir", "-p", f"{data_folder}/images"])
    for c in information["cameras"]:
        for j, image in enumerate(images[c]):
            # Save the image in WebP format
            image.save(
                f"{data_folder}/images/{c}__image_{j:08d}.webp",
                format="WEBP",
                quality=15,
                optimize=True,
            )
        df_img = pd.DataFrame()
        df_img["image"] = [f"{c}__image_{j:08d}.webp" for j in range(len(df))]
        df_img.to_json(
            f"{data_folder}/images/{c}__image.json", orient="records", index=False
        )

    # Define the directory containing the image sequence
    output_video = f"{data_folder}/video.mp4"
    c = information["cameras"][-1]

    frame_list_path = f"frame_list.txt"

    # Create the frame list for every 100th image
    with open(frame_list_path, "w") as f:
        for i in range(0, 1000, 100):  # Adjust the range and step size as needed
            f.write(f"file '{data_folder}/images/{c}__image_{i:08d}.webp'\n")

    ffmpeg_command = f"ffmpeg -y -f concat -safe 0 -i {frame_list_path} -c:v libx264 -pix_fmt yuv420p '{data_folder}/video.mp4'"

    subprocess.call(
        ffmpeg_command,
        shell=True,
    )

    # ffmpeg_command = f"ffmpeg -y -f concat -safe 0 -i {frame_list_path} -c:v libx264 -pix_fmt yuv420p '{data_folder}/video.mp4'"

    # Assuming 'information' is the data you want to write to the JSON file
    information["dataLength"] = len(df)
    information["durationInSeconds"] = (
        f'{len(df) / information["videoSamplingRate"]:.2f}'
    )
    with open(f"{data_folder}/information.json", "w") as json_file:
        json.dump(
            information, json_file, indent=2
        )  # The indent argument is optional, for pretty formatting

ffmpeg version 7.0.1 Copyright (c) 2000-2024 the FFmpeg developers
  built with Apple clang version 15.0.0 (clang-1500.3.9.4)
  configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/7.0.1 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --

In [None]:
def plain_format_action_dict(step):
    formatted_action_dict = {}

    for k, v in step.items():
        try:
            if isinstance(v, dict):
                # If the value is a dictionary, recurse into it
                formatted_action_dict[k] = plain_format_action_dict(v)
            elif isinstance(v, tf.Tensor) and v.dtype == tf.string:
                # If the value is a bytes (string in TensorFlow), decode it
                formatted_action_dict[k] = v.numpy().decode("utf-8")
            elif isinstance(v, tf.Tensor):
                # If the value is a Tensor, convert to a list
                formatted_action_dict[k] = v.numpy().tolist()
            else:
                # Handle other data types directly (like int, float, bool)
                formatted_action_dict[k] = v
        except Exception as e:
            print(f"Error processing key '{k}': {e}")

    return json.dumps(formatted_action_dict)


# Assuming `ds` is a dataset object with a proper iterator
ds_iter = iter(ds)
data_list = []

for i in range(1):  # Adjust as needed
    episode = next(ds_iter)
    step_data_list = []
    for step in episode["steps"]:
        step_data_list.append(step)
    new_step_data_list = []
    for step in step_data_list:  # Adjust as needed
        new_step_data_list.append(plain_format_action_dict(step))

In [None]:
data2_list = []

In [None]:
data2_list