In [1]:
#%%
import pickle
import robomimic.utils.torch_utils as TorchUtils
import robomimic.utils.train_utils as TrainUtils
import robomimic.utils.obs_utils as ObsUtils
import robomimic.utils.file_utils as FileUtils

from utils.environment import *
from utils.dataset import *
from utils.functions import *
from utils.rollout import *
from utils.test import *
from typing_extensions import OrderedDict
from torch.utils.data import DataLoader
from robomimic.config import config_factory



## Generate Configuration File

We start from a vanilla behavioral cloning (BC) algorithm, and generate an ensemble of NNs to extract the data that maximizes the performance of the BC model

In [4]:
# Define environment and type of demonstration data
env_fname = 'can' # ('can', 'lift')
if env_fname == 'can':
    envname = 'PickPlaceCan'
elif env_fname == 'lift':
    envname = 'Lift'
# Types of demonstration data from Robomimic (ph: proficient human, mh: multi-human)
demo_type = 'ph' # ('ph', 'mh')
# Define dataset path
dataset_path = f"datasets/{env_fname}/{demo_type}/low_dim.hdf5"

In [5]:
config = config_factory(algo_name="bc")
ObsUtils.initialize_obs_utils_with_config(config)
device = TorchUtils.get_torch_device(try_to_use_cuda=True) # get torch device
eval_epoch = 10

# Load environment's metadata
env_meta = FileUtils.get_env_metadata_from_dataset(dataset_path=dataset_path)
shape_meta = FileUtils.get_shape_metadata_from_dataset(
    dataset_path = dataset_path,
    all_modalities = config.observation.modalities.obs.low_dim,
    verbose=True
)



using obs type: low_dim with modalities: ['robot0_gripper_qpos', 'robot0_eef_quat', 'object', 'robot0_eef_pos']
using obs type: image with modalities: []
obs modality object with shape (14,)
obs modality robot0_eef_pos with shape (3,)
obs modality robot0_eef_quat with shape (4,)
obs modality robot0_gripper_qpos with shape (2,)


In [6]:
shape_meta['all_shapes']

OrderedDict([('object', (14,)),
             ('robot0_eef_pos', (3,)),
             ('robot0_eef_quat', (4,)),
             ('robot0_gripper_qpos', (2,))])

#### Create Environment

In [7]:
envs = OrderedDict()
env_names = [env_meta["env_name"]]
for env_name in env_names:
    envs[env_name] = \
        create_env_from_metadata(env_meta=env_meta, env_name=env_name)
log_dir, ckpt_dir, video_dir = TrainUtils.get_exp_dir(config) # paths to save results

REMOVING


#### Extract Demonstration Data

In [8]:
demo_data = SequenceDataset(
    hdf5_path = dataset_path,
    obs_keys = config.all_modalities,
    dataset_keys = ("actions",  "rewards",  "dones"),
    demos_input_list = [f'demo_{i}' for i in range(90)],
    load_next_obs = True,
    frame_stack = 1,
    seq_length = 1, # length-10 temporal sequences
    pad_frame_stack = True,
    pad_seq_length = True, # pad last obs per trajectory to ensure all sequences are sampled
    get_pad_mask = False,
    goal_mode = None,
    hdf5_cache_mode = "all", # cache dataset in memory to avoid repeated file i/o
    hdf5_use_swmr = True,
    hdf5_normalize_obs = False,
    filter_by_attribute = None, # can optionally provide a filter key here
)

# splitting training data and rest of the data
dataset_test = demo_data + SequenceDataset(
    hdf5_path = dataset_path,
    obs_keys = config.all_modalities,
    dataset_keys = ("actions", "rewards", "dones"),
    demos_input_list = [f'demo_{i}' for i in range(90, 200)],
    load_next_obs=True,
    frame_stack = 1,
    seq_length = 1, # length-10 temporal sequences
    pad_frame_stack = True,
    pad_seq_length = True, # pad last obs per trajectory to ensure all sequences are sampled
    get_pad_mask = False,
    goal_mode = None,
    hdf5_cache_mode = "all", # cache dataset in memory to avoid repeated file i/o
    hdf5_use_swmr = True,
    hdf5_normalize_obs = False,
    filter_by_attribute = None, # can optionally provide a filter key here
)

['demo_0', 'demo_1', 'demo_2', 'demo_3', 'demo_4', 'demo_5', 'demo_6', 'demo_7', 'demo_8', 'demo_9', 'demo_10', 'demo_11', 'demo_12', 'demo_13', 'demo_14', 'demo_15', 'demo_16', 'demo_17', 'demo_18', 'demo_19', 'demo_20', 'demo_21', 'demo_22', 'demo_23', 'demo_24', 'demo_25', 'demo_26', 'demo_27', 'demo_28', 'demo_29', 'demo_30', 'demo_31', 'demo_32', 'demo_33', 'demo_34', 'demo_35', 'demo_36', 'demo_37', 'demo_38', 'demo_39', 'demo_40', 'demo_41', 'demo_42', 'demo_43', 'demo_44', 'demo_45', 'demo_46', 'demo_47', 'demo_48', 'demo_49', 'demo_50', 'demo_51', 'demo_52', 'demo_53', 'demo_54', 'demo_55', 'demo_56', 'demo_57', 'demo_58', 'demo_59', 'demo_60', 'demo_61', 'demo_62', 'demo_63', 'demo_64', 'demo_65', 'demo_66', 'demo_67', 'demo_68', 'demo_69', 'demo_70', 'demo_71', 'demo_72', 'demo_73', 'demo_74', 'demo_75', 'demo_76', 'demo_77', 'demo_78', 'demo_79', 'demo_80', 'demo_81', 'demo_82', 'demo_83', 'demo_84', 'demo_85', 'demo_86', 'demo_87', 'demo_88', 'demo_89']
SequenceDataset: lo

#### Create Train Dataset

In [9]:
train_idx = list(range(len(demo_data)))
left_idx = list(range(len(demo_data), len(dataset_test)))
train_dset = [dataset_test[idx] for idx in train_idx]
left_dset = [dataset_test[idx] for idx in left_idx]
train_dataloader = DataLoader(
    dataset=train_dset ,
    sampler=None,       # no custom sampling logic (uniform sampling)
    batch_size=100,     # batches of size 100
    shuffle=True,
    num_workers=0,
    drop_last=True      # don't provide last batch in dataset pass if it's less than 100 in size
)

#### Define Ensemble Parameters

In [10]:
target = 300
kth = 5
epoch_num = 100
episodes_num = 50
scss_rate = [-1, -1]
training_loss = []
single_scss_rate = []
ensemble_num = 1
big_iter = 0
method = 'variance' # ['variance', 'max_error', 'min_error', 'random']

## Train

In [11]:
while (scss_rate[-1] < 0.96 and scss_rate[-2] < 0.9) and left_idx:
    big_iter += 1

    # Model ensemble, use the basic BC model as the basic version
    model_ensemble = []
    for _ in range(ensemble_num):
        model_ensemble.append(get_example_model(dataset_path=dataset_path, device=device))

    print('There are ' + str(len(left_idx)) + ' samples left.')
    Ensemble_LOSS = []
    count_model = 0

    print('training size is ' + str(len(train_dataloader)*100))

    data_loader_iter_lift = iter(train_dataloader)

    for tmp_model_lift in model_ensemble:
        print("\n----------------------------------------")
        print(f'Model No. {count_model}')
        print("----------------------------------------\n")
        count_model += 1
        # iterator for data_loader - it yields batches

        # record losses
        losses = []
        for epoch in range(epoch_num):
            for i_step in range(1, len(train_dset) // 100 + 1):
                if i_step % 100 == 0 and epoch % 10 == 0:
                    print(f'Epoch {epoch} -> (step = {i_step})')
                try:
                    batch = next(data_loader_iter_lift)
                except StopIteration:
                    # data loader ran out of batches - reset and yield first batch
                    data_loader_iter_lift = iter(train_dataloader)
                    batch = next(data_loader_iter_lift)

                input_batch = tmp_model_lift.process_batch_for_training(batch)
                info = tmp_model_lift.train_on_batch(batch=input_batch, epoch=50, validate=False)

                # record loss
                step_log = tmp_model_lift.log_info(info)
                losses.append(step_log["Loss"])
        Ensemble_LOSS.append(losses.copy())
    training_loss.append(Ensemble_LOSS.copy())

    # evaluate the ensemble based policy
    rollout_model_list = [RolloutPolicyEnsemble(model, obs_normalization_stats=None) for model in model_ensemble]

    all_rollout_logs, video_paths, ob_ac_log = rollout_with_stats(
        policy = rollout_model_list,
        envs = envs,
        horizon = config.experiment.rollout.horizon,
        use_goals = config.use_goals,
        num_episodes = episodes_num,
        render = False,
        video_dir = None, # (None, 'videos')
        epoch = eval_epoch,
        video_skip = config.experiment.get("video_skip", 5),
        terminate_on_success = config.experiment.rollout.terminate_on_success,
    )

    # save success rate after each round
    scss_rate.append(all_rollout_logs[envname]['Success_Rate'])
    print('Succcess rate this round is ' + str(all_rollout_logs[envname]['Success_Rate']))

    # ------------------------------------------------------------------------------------------------
    # Active Sample Selection
    # ------------------------------------------------------------------------------------------------

    closest_idx_list = [] # Selected samples
    if method == 'random':
        # randomly select idx to add
        closest_idx_list = random_index_select(left_idx, target)
    elif method == 'variance':
        # Computes prediction variance
        ob_idx_dict, ob_array, ac_array = obdicts2array(ob_ac_log, [key for key in batch['obs'].keys()])
        ac_var = np.var(np.var(ac_array, axis = 1), axis = 1)
        ac_var_list = [(ac_var[i], i) for i in range(len(ac_var))]
        ac_var_list.sort(key = lambda x:x[0])
        # find data points closest to large variance locations
        count = 0
        left_obs_dict = dataset_obs(dataset_test, left_idx, [key for key in batch['obs'].keys()])
        while len(closest_idx_list) < target:
            _, tmp_idx = ac_var_list.pop(-1)
            tmp_obs = ob_array[tmp_idx]
            closest_idx = FindClosestIdx(left_obs_dict, tmp_obs, closest_idx_list, kth)
            for j in closest_idx:
                if j not in closest_idx_list:
                    closest_idx_list.append(j[1])
    elif method == 'max_error':
        # select those with largest test error
        prediction_error = prediction_error_compute(dataset_test, rollout_model_list)
        closest_idx_list = error_idx_selection(prediction_error, target, train_idx, 'max')
    elif method == 'min_error':
        # select those with smallest test error
        prediction_error = prediction_error_compute(dataset_test, rollout_model_list)
        closest_idx_list = error_idx_selection(prediction_error, target, train_idx, 'min')

    # update training dataset
    UpdateTrain(train_idx, left_idx, closest_idx_list)

    train_dset = [dataset_test[idx] for idx in train_idx]
    train_dataloader = DataLoader(
        dataset = train_dset ,
        sampler = None,       # no custom sampling logic (uniform sampling)
        batch_size = 100,     # batches of size 100
        shuffle = True,
        num_workers = 0,
        drop_last = True      # don't provide last batch in dataset pass if it's less than 100 in size
    )




using obs type: low_dim with modalities: ['robot0_gripper_qpos', 'robot0_eef_quat', 'object', 'robot0_eef_pos']
using obs type: image with modalities: []


using obs type: low_dim with modalities: ['robot0_gripper_qpos', 'robot0_eef_quat', 'object', 'robot0_eef_pos']
using obs type: image with modalities: []


using obs type: low_dim with modalities: ['robot0_gripper_qpos', 'robot0_eef_quat', 'object', 'robot0_eef_pos']
using obs type: image with modalities: []
There are 13008 samples left.
training size is 10100

----------------------------------------
Model No. 0
----------------------------------------

Epoch 0 -> (step = 100)
Epoch 10 -> (step = 100)
Epoch 20 -> (step = 100)
Epoch 30 -> (step = 100)
Epoch 40 -> (step = 100)
Epoch 50 -> (step = 100)
Epoch 60 -> (step = 100)
Epoch 70 -> (step = 100)
Epoch 80 -> (step = 100)
Epoch 90 -> (step = 100)

----------------------------------------
Model No. 1
----------------------------------------

Epoch 0 -> (step = 100)
Epoch 10 -> (s

## Test

In [None]:
# test trained model on left data
obs_name = ['object', 'robot0_joint_pos', 'robot0_joint_pos_cos', 'robot0_joint_pos_sin', \
           'robot0_joint_vel', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos', \
            'robot0_gripper_qvel']

prediction_var = []
prediction_mean = []
labels = []
prediction_error = []

for case_i in range(len(dataset_test)):
    tmp_ob_dict = {}
    for key in obs_name:
        if key in dataset_test[case_i]['obs']:
            tmp_ob_dict[key] = dataset_test[case_i]['obs'][key]

    # label the action
    labels.append(dataset_test[case_i]['actions'])

    tmp_prediction_list = []
    for model_id in range(len(rollout_model_list)):
        tmp_prediction_list.append(rollout_model_list[model_id](ob=tmp_ob_dict, goal=None))

    tmp_prediction_list = np.array(tmp_prediction_list)
    prediction_mean.append(tmp_prediction_list.mean(axis = 0))
    prediction_var.append(tmp_prediction_list.var(axis = 0))
    prediction_error.append(dataset_test[case_i]['actions'] - tmp_prediction_list.var(axis = 0))

# error without normalizing
error_rank = []
var_rank = []
for error_idx in range(len(prediction_error)):
    error_rank.append((error_idx, np.linalg.norm(prediction_error[error_idx])))
    var_rank.append((error_idx, np.mean(prediction_var[error_idx])))

# rank according to error and var
error_rank.sort(key = lambda x:x[1])
error_idx_list = [ele[0] for ele in error_rank[-5000:]]
var_rank.sort(key = lambda x:x[1])
var_idx_list = [ele[0] for ele in var_rank[-5000:]]

### Save Results for Plots

In [None]:
# Success rate
with open(f"{envname}_{demo_type}_scss_ensemble_{ensemble_num}_{method}.txt", "wb") as fp:
    pickle.dump(scss_rate, fp)

# Training error
with open(f"{envname}_{demo_type}_trainloss_ensemble_{ensemble_num}_{method}.txt", "wb") as fp:
    pickle.dump(training_loss, fp)