In [7]:
import argparse
import json
import h5py
import imageio
import numpy as np
import os
from copy import deepcopy
import glob

import torch
import matplotlib.pyplot as plt

import robomimic
import robomimic.utils.file_utils as FileUtils
import robomimic.utils.torch_utils as TorchUtils
import robomimic.utils.tensor_utils as TensorUtils
import robomimic.utils.obs_utils as ObsUtils
from robomimic.envs.env_base import EnvBase
from robomimic.algo import RolloutPolicy

import urllib.request
%cd /home/omniverse/workspace/robomimic/

# print LD_LIBRARY_PATH
import os
# print(os.environ['LD_LIBRARY_PATH'])
# print(list(os.environ))
os.environ['LD_LIBRARY_PATH'] = '/usr/local/cuda-12.1/lib64:/home/omniverse/.mujoco/mujoco210/bin:/usr/lib/nvidia:/home/omniverse/workspace/PyRep/CoppeliaSim_Edu_V4_1_0_Ubuntu20_04'


/home/omniverse/workspace/robomimic


In [8]:
def get_best_model(results_paths, models_path):
    success_rate = np.load(os.path.join(results_paths, "agent_results", "success_rates.npy"))
    # get idx of the best model
    best_model_idx = np.argmax(success_rate.mean(axis=1))
    plt.figure()
    plt.plot(success_rate.mean(axis=1))
    plt.show()
    print("Best model idx: ", best_model_idx)
    print("Best model success rate: ", success_rate[best_model_idx].mean())
    print("Length of models_path: ", len(models_path))
    return models_path[best_model_idx]

def get_best_model_idx(results_paths):
    success_rate = np.load(os.path.join(results_paths, "agent_results", "success_rates.npy"))
    # get idx of the best model
    best_model_idx = np.argmax(success_rate.mean(axis=1))
    return best_model_idx


def rollout(policy, env, horizon, render=False, video_writer=None, video_skip=5, camera_names=None):
    """
    Helper function to carry out rollouts. Supports on-screen rendering, off-screen rendering to a video, 
    and returns the rollout trajectory.
    Args:
        policy (instance of RolloutPolicy): policy loaded from a checkpoint
        env (instance of EnvBase): env loaded from a checkpoint or demonstration metadata
        horizon (int): maximum horizon for the rollout
        render (bool): whether to render rollout on-screen
        video_writer (imageio writer): if provided, use to write rollout to video
        video_skip (int): how often to write video frames
        camera_names (list): determines which camera(s) are used for rendering. Pass more than
            one to output a video with multiple camera views concatenated horizontally.
    Returns:
        stats (dict): some statistics for the rollout - such as return, horizon, and task success
    """
    assert isinstance(env, EnvBase)
    assert isinstance(policy, RolloutPolicy)
    assert not (render and (video_writer is not None))

    policy.start_episode()
    obs = env.reset()
    state_dict = env.get_state()

    # hack that is necessary for robosuite tasks for deterministic action playback
    obs = env.reset_to(state_dict)

    results = {}
    video_count = 0  # video frame counter
    total_reward = 0.
    try:
        for step_i in range(horizon):

            # get action from policy
            act = policy(ob=obs)

            # play action
            next_obs, r, done, _ = env.step(act)

            # compute reward
            total_reward += r
            success = env.is_success()["task"]

            # visualization
            if render:
                env.render(mode="human", camera_name=camera_names[0])
            if video_writer is not None:
                if video_count % video_skip == 0:
                    video_img = []
                    for cam_name in camera_names:
                        video_img.append(env.render(mode="rgb_array", height=512, width=512, camera_name=cam_name))
                    video_img = np.concatenate(video_img, axis=1) # concatenate horizontally
                    video_writer.append_data(video_img)
                video_count += 1

            # break if done or if success
            if done or success:
                break

            # update for next iter
            obs = deepcopy(next_obs)
            state_dict = env.get_state()

    except env.rollout_exceptions as e:
        print("WARNING: got rollout exception {}".format(e))

    stats = dict(Return=total_reward, Horizon=(step_i + 1), Success_Rate=float(success))

    return stats

    

In [35]:
# results_path = "../../output/mrl_trained_models_ds/"
# dataset = "mg"
# task = "lift"
# results_paths = glob.glob(results_path + "MetricRL_" +  task + "_" + dataset + "/*")[0]

results_path = "cluster_output/"

# datasets = ["ph", "mh"]#, "mg"]
# tasks = ["square", "transport"] # square transport

model = "mrl"
dataset = "ph"
task = "square"

results_path = results_path + f"{model}_trained_models_ds/{model}_{task}_{dataset}"
results_paths = glob.glob(results_path + f"/*")
results_paths.sort()
print(results_paths)
models_path = glob.glob(results_paths[0] + "/models/*")
# results_paths = results_paths[1]
models_path.sort()
# for p in models_path:
#     print(p)
# # print(models_path)
# print(len(models_path))

steps = 50
# ckpt_path_idx = get_best_model(results_paths, models_path)
ckpt_path_idx = get_best_model_idx(results_path)
ckpt_path = results_paths[0] + f"/models/model_epoch_{steps*(1+ckpt_path_idx)}.pth"
# ckpt_path = results_paths + f"/models/model_epoch_1000.pth"
print(ckpt_path)
device = TorchUtils.get_torch_device(try_to_use_cuda=True)

# restore policy
policy, ckpt_dict = FileUtils.policy_from_checkpoint(ckpt_path=ckpt_path, device=device, verbose=True)

# print(os.path.exists('datasets/lift/mg/low_dim_sparse_v141_augmented_goal.hdf5'))
# # print current directory
# print(os.getcwd())

['cluster_output/mrl_trained_models_ds/mrl_square_ph/20240913173310', 'cluster_output/mrl_trained_models_ds/mrl_square_ph/agent_results']
cluster_output/mrl_trained_models_ds/mrl_square_ph/20240913173310/models/model_epoch_200.pth
{
    "algo_name": "mrl",
    "experiment": {
        "name": "mrl_square_ph",
        "validate": false,
        "logging": {
            "terminal_output_to_txt": true,
            "log_tb": false,
            "log_wandb": true,
            "wandb_proj_name": "MetricRL_Baselines"
        },
        "save": {
            "enabled": true,
            "every_n_seconds": null,
            "every_n_epochs": 50,
            "epochs": [],
            "on_best_validation": false,
            "on_best_rollout_return": false,
            "on_best_rollout_success_rate": true
        },
        "epoch_every_n_steps": 100,
        "validation_epoch_every_n_steps": 10,
        "env": null,
        "additional_envs": null,
        "render": false,
        "render_video": 

In [36]:



# create environment from saved checkpoint
env, _ = FileUtils.env_from_checkpoint(
    ckpt_dict=ckpt_dict, 
    render=False, # we won't do on-screen rendering in the notebook
    render_offscreen=True, # render to RGB images for video
    verbose=True,
)

INFO:root:Device 0 is available for rendering
INFO:root:Device 1 is available for rendering
INFO:root:Command '['/home/omniverse/miniforge3/envs/robomimic/lib/python3.8/site-packages/egl_probe/build/test_device', '3']' returned non-zero exit status 1.
INFO:root:Device 3 is not available for rendering
INFO:root:Command '['/home/omniverse/miniforge3/envs/robomimic/lib/python3.8/site-packages/egl_probe/build/test_device', '4']' returned non-zero exit status 1.
INFO:root:Device 4 is not available for rendering


Loaded dataset 'object' with shape (14,)
Loaded dataset 'robot0_eef_pos' with shape (3,)
Loaded dataset 'robot0_eef_quat' with shape (4,)
Loaded dataset 'robot0_eef_vel_ang' with shape (3,)
Loaded dataset 'robot0_eef_vel_lin' with shape (3,)
Loaded dataset 'robot0_gripper_qpos' with shape (2,)
Loaded dataset 'robot0_gripper_qvel' with shape (2,)
Loaded dataset 'robot0_joint_pos' with shape (7,)
Loaded dataset 'robot0_joint_pos_cos' with shape (7,)
Loaded dataset 'robot0_joint_pos_sin' with shape (7,)
Loaded dataset 'robot0_joint_vel' with shape (7,)
Created environment with name NutAssemblySquare
Action size is 7
NutAssemblySquare
{
    "camera_depths": false,
    "camera_heights": 84,
    "camera_widths": 84,
    "control_freq": 20,
    "controller_configs": {
        "control_delta": true,
        "damping": 1,
        "damping_limits": [
            0,
            10
        ],
        "impedance_mode": "fixed",
        "input_max": 1,
        "input_min": -1,
        "interpolation

In [38]:
seed = 1
rollout_horizon = 500
if task == "transport":
    rollout_horizon = 1000
np.random.seed(seed)
torch.manual_seed(seed)
video_path = "rollout.mp4"
video_writer = imageio.get_writer(video_path, fps=20)

stats = rollout(
    policy=policy, 
    env=env, 
    horizon=rollout_horizon, 
    render=False, 
    video_writer=video_writer, 
    video_skip=1, 
    camera_names=["agentview"]
)
print(stats)
video_writer.close()

{'Return': 1.0, 'Horizon': 181, 'Success_Rate': 1.0}


In [29]:
from IPython.display import Video
Video(video_path)