In [53]:
import tqdm
import torch
from torch.utils.data import DataLoader
import tensorflow as tf

from robomimic.utils.rlds_utils import droid_dataset_transform, robomimic_transform, TorchRLDSDataset, robomimic_dg_transform, dg_dataset_transform

from octo.data.dataset import make_dataset_from_rlds, make_interleaved_dataset, make_single_dataset
from octo.data.utils.data_utils import combine_dataset_statistics
from octo.utils.spec import ModuleSpec

tf.config.set_visible_devices([], "GPU")
from octo.utils.spec import ModuleSpec
import importlib

In [52]:
import robomimic
importlib.reload(robomimic.utils.rlds_utils)

<module 'robomimic.utils.rlds_utils' from 'c:\\workspace\\droid_policy_learning\\robomimic\\utils\\rlds_utils.py'>

In [60]:
DATA_PATH = "C:/Users/willi/tensorflow_datasets"    # UPDATE WITH PATH TO RLDS DATASETS
DATASET_NAME = "deligrasp_dataset"
EXP_LOG_PATH = "C:/workspace/deligrasp_policy_learning/logs" # UPDATE WITH PATH TO DESIRED LOGGING DIRECTORY
sample_weights = [1]

# import tensorflow_datasets as tfds
# builder = tfds.builder_from_directory(f"{DATA_PATH}/1.0.0")
# builder.info.features

BASE_DATASET_KWARGS = {
    "name": DATASET_NAME,
    "data_dir": DATA_PATH,
    "image_obs_keys": {"primary": "image", "secondary": "wrist_image"},
    "state_obs_keys": ["cartesian_position", "gripper_position", "applied_force", "contact_force"],
    # "state_obs_keys": ["state"], # this makes ["observation"]['proprio'].shape len 16
    "language_key": "language_instruction",
    "norm_skip_keys":  ["proprio"],
    "action_proprio_normalization_type": "bounds",
    "absolute_action_mask": [False] * 11,                    # droid_dataset_transform uses absolute actions
    "action_normalization_mask": [False] * 11,      # don't normalize final (gripper) dimension
    "standardize_fn": dg_dataset_transform,
}


stats = make_dataset_from_rlds(**BASE_DATASET_KWARGS, train=True)
# combined_dataset_statistics = combine_dataset_statistics(
#     [make_dataset_from_rlds(**dataset_kwargs, train=True)[1] for dataset_kwargs in dataset_kwargs_list]
# )


In [61]:
dataset = make_single_dataset(
    BASE_DATASET_KWARGS,
    train=True,
    traj_transform_kwargs=dict(
        window_size=2,
        future_action_window_size=15,
        subsample_length=50,
        skip_unlabeled=True,            # skip all trajectories without language annotation
    ),
    frame_transform_kwargs=dict(
        image_augment_kwargs=dict(
        ),
        resize_size=dict(
            primary=[128, 128],
            secondary=[128, 128],
        ),
        num_parallel_calls=200,
    )
)

In [21]:
dataset

<DLataset element_spec={'observation': {'image_primary': TensorSpec(shape=(None, 2, 128, 128, None), dtype=tf.uint8, name=None), 'image_secondary': TensorSpec(shape=(None, 2, 128, 128, None), dtype=tf.uint8, name=None), 'proprio': TensorSpec(shape=(None, 2, 9), dtype=tf.float32, name=None), 'timestep': TensorSpec(shape=(None, 2), dtype=tf.int32, name=None), 'pad_mask_dict': {'image_primary': TensorSpec(shape=(None, 2), dtype=tf.bool, name=None), 'image_secondary': TensorSpec(shape=(None, 2), dtype=tf.bool, name=None), 'proprio': TensorSpec(shape=(None, 2), dtype=tf.bool, name=None), 'timestep': TensorSpec(shape=(None, 2), dtype=tf.bool, name=None)}, 'pad_mask': TensorSpec(shape=(None, 2), dtype=tf.bool, name=None)}, 'task': {'language_instruction': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'pad_mask_dict': {'language_instruction': TensorSpec(shape=(None,), dtype=tf.bool, name=None)}}, 'action': TensorSpec(shape=(None, 17, 11), dtype=tf.float32, name=None), 'dataset_name': 

In [55]:
from typing import Dict, Any
def dg_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
    print(trajectory.keys())
    print(trajectory["observation"].keys())
    print(trajectory["observation"]['proprio'].shape)
    print(trajectory["task"].keys())
    print(trajectory["action"].shape)
    print(trajectory["action"])
    return {
        "obs": {
            "camera/image/varied_camera_1_left_image": 
                tf.cast(trajectory["observation"]["image_primary"], tf.float32) / 255.,
            "camera/image/varied_camera_2_left_image": 
                tf.cast(trajectory["observation"]["image_secondary"], tf.float32) / 255.,
            "raw_language": trajectory["task"]["language_instruction"],
            "robot_state/cartesian_position": trajectory["observation"]["proprio"][..., :6],
            "robot_state/gripper_position": trajectory["observation"]["proprio"][..., -3:-2],
            "robot_state/applied_force": trajectory["observation"]["proprio"][..., -2:-1],
            "robot_state/contact_force": trajectory["observation"]["proprio"][..., -1:],
            "pad_mask": trajectory["observation"]["pad_mask"][..., None],
        },
        "actions": trajectory["action"][1:],
    }

test = dataset.map(dg_transform, num_parallel_calls=48)


dict_keys(['observation', 'task', 'action', 'dataset_name'])
dict_keys(['image_primary', 'image_secondary', 'proprio', 'timestep', 'pad_mask_dict', 'pad_mask'])
(None, 2, 9)
dict_keys(['language_instruction', 'pad_mask_dict'])
(None, 17, 11)
Tensor("args_0:0", shape=(None, 17, 11), dtype=float32)


In [56]:
dataset = dataset.map(robomimic_dg_transform, num_parallel_calls=48)


In [47]:
class TestDataset(torch.utils.data.IterableDataset):
    """Thin wrapper around RLDS dataset for use with PyTorch dataloaders."""

    def __init__(
        self,
        rlds_dataset,
        train=True,
    ):
        self._rlds_dataset = rlds_dataset
        self._is_train = train

    def __iter__(self):
        for sample in self._rlds_dataset.as_numpy_iterator():
            rl = sample['obs']['raw_language']
            sample['obs']['raw_language'] = rl.tolist()
            yield sample

    def __len__(self):
        lengths = np.array(
            [
                stats["num_transitions"]
                for stats in self._rlds_dataset.dataset_statistics
            ]
        )
        if hasattr(self._rlds_dataset, "sample_weights"):
            lengths *= np.array(self._rlds_dataset.sample_weights)
        total_len = lengths.sum()
        if self._is_train:
            return int(0.95 * total_len)
        else:
            return int(0.05 * total_len)


In [57]:
# pytorch_dataset = robomimic.utils.rlds_utils.TorchRLDSDataset(dataset)
pytorch_dataset = TestDataset(dataset)
train_loader = DataLoader(
    pytorch_dataset,
    batch_size=1,
    num_workers=0,  # important to keep this to 0 so PyTorch does not mess with the parallelism
)

# for i, sample in tqdm.tqdm(enumerate(train_loader)):
#     if i == 5000:
#         break

In [58]:
# find trajectory length for each trajectory in dataset
# traj_lengths = []
# for traj in dataset:
#     print(traj['obs']['camera/image/varied_camera_1_left_image'].shape)
    
i = 0
s = None
for sample in pytorch_dataset:
    # print(sample)
    s = sample
    i += 1
    if i > 1:
        break

ok = s['obs'].keys()
ak = s['actions']

for k in ok:
    print(f"type of {k}: {type(s['obs'][k])}")

type of camera/image/varied_camera_1_left_image: <class 'numpy.ndarray'>
type of camera/image/varied_camera_2_left_image: <class 'numpy.ndarray'>
type of raw_language: <class 'list'>
type of robot_state/cartesian_position: <class 'numpy.ndarray'>
type of robot_state/gripper_position: <class 'numpy.ndarray'>
type of robot_state/applied_force: <class 'numpy.ndarray'>
type of robot_state/contact_force: <class 'numpy.ndarray'>
type of pad_mask: <class 'numpy.ndarray'>


In [59]:
import numpy as np
for k in ok:
    kk = np.array(s['obs'][k])
    print(f"shape of {k}: {kk.shape}")

shape of camera/image/varied_camera_1_left_image: (50, 2, 128, 128, 3)
shape of camera/image/varied_camera_2_left_image: (50, 2, 128, 128, 3)
shape of raw_language: (50,)
shape of robot_state/cartesian_position: (50, 2, 6)
shape of robot_state/gripper_position: (50, 2, 1)
shape of robot_state/applied_force: (50, 2, 1)
shape of robot_state/contact_force: (50, 2, 1)
shape of pad_mask: (50, 2, 1)


In [51]:
ak.shape

(49, 17, 11)

In [None]:
from robomimic.scripts.config_gen.helper import *
import random
import json
import numpy as np
from collections import OrderedDict

EXP_LOG_PATH = "C:/workspace/deligrasp_policy_learning/logs" # UPDATE WITH PATH TO DESIRED LOGGING DIRECTORY


In [None]:
from robomimic.scripts.config_gen.helper import *
import random
import json
import numpy as np
from collections import OrderedDict

#############################################################################
# *************** Replace with your paths/config information ****************

# Note: Assumes naming of dataset in "datasets" for the full DROID dataset is
# droid

DATA_PATH = "C:/Users/willi/tensorflow_datasets/deligrasp_dataset_scaled"    # UPDATE WITH PATH TO RLDS DATASETS
EXP_LOG_PATH = "C:/workspace/deligrasp_policy_learning/logs" # UPDATE WITH PATH TO DESIRED LOGGING DIRECTORY
EXP_NAMES = OrderedDict(
    [
        # Note: you can add co-training dataset here appending
        # a new dataset to "datasets" and adjusting "sample_weights"
        # accordingly
        ("droid", {"datasets": ["droid"],
                   "sample_weights": [1]})                                    
    ])

#############################################################################

def make_generator_helper(args):
    algo_name_short = "diffusion_policy"

    generator = get_generator(
        algo_name="diffusion_policy",
        config_file=os.path.join(base_path, 'robomimic/exps/templates/diffusion_policy_test.json'),
        args=args,
        exp_log_path=EXP_LOG_PATH,
        algo_name_short=algo_name_short,
        pt=True,
    )
    if args.ckpt_mode is None:
        args.ckpt_mode = "off"

    generator.add_param(
        key="train.data_format",
        name="",
        group=-1,
        values=[
            "droid_rlds"
        ],
    )

    generator.add_param(
        key="train.num_epochs",
        name="",
        group=-1,
        values=[100000],
    )

    generator.add_param(
        key="train.data_path",
        name="",
        group=-1,
        values=[DATA_PATH],
    )

    generator.add_param(
        key="train.shuffle_buffer_size",
        name="",
        group=-1,
        values=[500000],
    )

    generator.add_param(
        key="train.batch_size",
        name="bz",
        group=1212111,
        values=[128],
        hidename=False,
    )

    generator.add_param(
        key="train.subsample_length",
        name="subsample_length",
        group=7070707,
        values=[
            100
        ],
        hidename=True,
    )

    generator.add_param(
        key="train.num_parallel_calls",
        name="num_parallel_calls",
        group=404040404,
        values=[
            200
        ],
        hidename=True,
    )

    generator.add_param(
        key="train.traj_transform_threads",
        name="traj_transform_threads",
        group=303030303,
        values=[
            48
        ],
        hidename=True,
    )

    generator.add_param(
        key="train.traj_read_threads",
        name="traj_read_threads",
        group=908090809,
        values=[
            48
        ],
        hidename=True,
    )

    generator.add_param(
        key="algo.noise_samples",
        name="noise_samples",
        group=1010101,
        values=[8],
        value_names=["8"]
    )

    # use ddim by default
    generator.add_param(
        key="algo.ddim.enabled",
        name="ddim",
        group=1001,
        values=[
            True,
            # False,
        ],
        hidename=True,
    )
    generator.add_param(
        key="algo.ddpm.enabled",
        name="ddpm",
        group=1001,
        values=[
            False,
            # True,
        ],
        hidename=True,
    )

    if args.env == "deligrasp":
        generator.add_param(
            key="train.data",
            name="ds",
            group=2,
            values=[
                [
                    {"path": "~/datasets/square/ph/square_ph_abs_tmp.hdf5"}, # replace with your own path
                ],
            ],
            value_names=[
                "square",
            ],
        )

        # update env config to use absolute action control
        generator.add_param(
            key="experiment.env_meta_update_dict",
            name="",
            group=-1,
            values=[
                {"env_kwargs": {"controller_configs": {"control_delta": False}}}
            ],
        )
        
        generator.add_param(
            key="train.action_keys",
            name="ac_keys",
            group=-1,
            values=[
                [
                    "action_dict/abs_pos",
                    "action_dict/abs_rot_6d",
                    "action_dict/gripper",
                    # "actions",
                ],
            ],
            value_names=[
                "abs",
            ],
        )
    
    elif args.env == "droid":
        generator.add_param(
            key="train.sample_weights",
            name="sample_weights",
            group=24988,
            values=[
                EXP_NAMES[k]["sample_weights"] for k in EXP_NAMES.keys()
            ],
        )
        generator.add_param(
            key="train.dataset_names",
            name="dataset_names",
            group=24988,
            values=[
                EXP_NAMES[k]["datasets"] for k in EXP_NAMES.keys()
            ],
            value_names=list(EXP_NAMES.keys())
        )
        generator.add_param(
            key="train.action_keys",
            name="ac_keys",
            group=-1,
            values=[
                [
                    "action/abs_pos",
                    "action/abs_rot_6d",
                    "action/gripper_position",
                ],
            ],
            value_names=[
                "abs",
            ],
            hidename=True,
        )
        generator.add_param(
            key="train.action_shapes",
            name="ac_shapes",
            group=-1,
            values=[
                [
                    (1, 3),
                    (1, 6),
                    (1, 1),
                ],
            ],
            value_names=[
                "ac_shapes",
            ],
            hidename=True,
        )
        generator.add_param(
            key="observation.image_dim",
            name="",
            group=-1,
            values=[
                [128, 128],
            ],
            hidename=True,
        )
        generator.add_param(
            key="observation.modalities.obs.rgb",
            name="cams",
            group=130,
            values=[
                # ["camera/image/hand_camera_left_image"],
                # ["camera/image/hand_camera_left_image", "camera/image/hand_camera_right_image"],
                ["camera/image/varied_camera_1_left_image", "camera/image/varied_camera_2_left_image"],
                # [
                    # "camera/image/hand_camera_left_image", "camera/image/hand_camera_right_image",
                #     "camera/image/varied_camera_1_left_image", "camera/image/varied_camera_1_right_image",
                #     "camera/image/varied_camera_2_left_image", "camera/image/varied_camera_2_right_image",
                # ],
            ],
            value_names=[
                # "wrist",
                # "wrist-stereo",
                "2cams",
                # "3cams-stereo",
            ]
        )
        generator.add_param(
            key="observation.encoder.rgb.obs_randomizer_class",
            name="obsrand",
            group=130,
            values=[
                # "ColorRandomizer", # jitter only
                ["ColorRandomizer", "CropRandomizer"], # jitter, followed by crop
            ],
            hidename=True,
        )
        generator.add_param(
            key="observation.encoder.rgb.obs_randomizer_kwargs",
            name="obsrandargs",
            group=130,
            values=[
                # {}, # jitter only
                [{}, {"crop_height": 116, "crop_width": 116, "num_crops": 1, "pos_enc": False}], # jitter, followed by crop
            ],
            hidename=True,
        )

        ### CONDITIONING
        generator.add_param(
            key="train.goal_mode",
            name="goal_mode",
            group=24986,
            values = [
                # "geom",
                None, # Change this to "geom" to do goal conditioning

            ]
        )
        generator.add_param(
            key="train.truncated_geom_factor",
            name="truncated_geom_factor",
            group=5555,
            values = [
                0.3,
                # 0.5
            ]
        )
        generator.add_param(
            key="observation.modalities.obs.low_dim",
            name="ldkeys",
            group=24986,
            values=[
                ["robot_state/cartesian_position", "robot_state/gripper_position"],
            ],
            value_names=[
                "proprio-lang",
            ],
            hidename=False,
        )
        generator.add_param(
            key="observation.encoder.rgb.core_kwargs.backbone_kwargs.use_cam",
            name="",
            group=2498,
            values=[
                False,
                # True,
            ],
            hidename=True,
        )
        generator.add_param(
            key="observation.encoder.rgb.core_kwargs.backbone_kwargs.pretrained",
            name="",
            group=2498,
            values=[
                # False,
                True,
            ],
            hidename=True,
        )
        generator.add_param(
            key="observation.encoder.rgb.core_class",
            name="visenc",
            group=-1,
            values=["VisualCore"],
        )
        generator.add_param(
            key="observation.encoder.rgb.core_kwargs.backbone_class",
            name="",
            group=-1,
            values=["ResNet50Conv"],
            hidename=True,
        )
        generator.add_param(
            key="observation.encoder.rgb.core_kwargs.feature_dimension",
            name="visdim",
            group=1234,
            values=[
                512,
                # None,
                # None
            ],
            hidename=True,
        )
        generator.add_param(
            key="observation.encoder.rgb.core_kwargs.flatten",
            name="flatten",
            group=1234,
            values=[
                True,
                # False,
                # False
            ],
            hidename=True,
        )
        generator.add_param(
            key="observation.encoder.rgb.fuser",
            name="fuser",
            group=1234,
            values=[
                None,
                # "transformer",
                # "perceiver"
            ],
            hidename=False,
        )
        generator.add_param(
            key="observation.encoder.rgb.core_kwargs.backbone_kwargs.downsample",
            name="",
            group=1234,
            values=[
                False,
            ],
            hidename=False,
        )

    else:
        raise ValueError
    
    generator.add_param(
        key="train.output_dir",
        name="",
        group=-1,
        values=[
            "{exp_log_path}/{env}/{mod}/{algo_name_short}".format(
                exp_log_path=EXP_LOG_PATH,
                env=args.env,
                mod=args.mod, 
                algo_name_short=algo_name_short,
            )
        ],
    )

    return generator

if __name__ == "__main__":
    parser = get_argparser()

    args = parser.parse_args()
    make_generator(args, make_generator_helper)