## Installation and Imports

In [1]:
!pwd

/workspace/darm-mujoco/darm_training


In [2]:
# Configure env variables

# TODO: change path
import os
os.environ["DARM_MUJOCO_PATH"] = "/workspace/darm-mujoco"
os.getenv('DARM_MUJOCO_PATH')

'/workspace/darm-mujoco'

In [3]:
# Check if GCC is installed
!gcc --version

gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
Copyright (C) 2017 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.



In [4]:
# Install GCC if absent
!sudo apt update
!sudo apt install build-essential -y

[sudo] password for daniel: 
[sudo] password for daniel: 


In [None]:
# # Setup Mujoco for gym - If needed
# !apt-get install -y \
#     libgl1-mesa-dev \
#     libgl1-mesa-glx \
#     libglew-dev \
#     libosmesa6-dev \
#     software-properties-common

# !apt-get install -y patchelf

# !pip install gym

# !pip install free-mujoco-py

# import mujoco_py
# import gym

In [3]:
!pip install ray[rllib] torch
!pip install wandb
!pip install tensorflow_probability



In [4]:
!python setup.py install

/home/daniel/DARM/darm_mujoco
running install
running bdist_egg
running egg_info
writing darm_gym_env.egg-info/PKG-INFO
writing dependency_links to darm_gym_env.egg-info/dependency_links.txt
writing requirements to darm_gym_env.egg-info/requires.txt
writing top-level names to darm_gym_env.egg-info/top_level.txt
reading manifest file 'darm_gym_env.egg-info/SOURCES.txt'
writing manifest file 'darm_gym_env.egg-info/SOURCES.txt'
installing library code to build/bdist.linux-x86_64/egg
running install_lib
running build_py
creating build/bdist.linux-x86_64/egg
creating build/bdist.linux-x86_64/egg/darm_gym_env
copying build/lib/darm_gym_env/darm_sf_gym.py -> build/bdist.linux-x86_64/egg/darm_gym_env
copying build/lib/darm_gym_env/__init__.py -> build/bdist.linux-x86_64/egg/darm_gym_env
copying build/lib/darm_gym_env/multi_darm_gym.py -> build/bdist.linux-x86_64/egg/darm_gym_env
copying build/lib/darm_gym_env/darm_gym.py -> build/bdist.linux-x86_64/egg/darm_gym_env
copying build/lib/darm_gym_e

In [4]:
# Check if mujoco import is successful
import mujoco

In [None]:
# If mujoco import fails, update pandas and restart runtime
!pip install pandas -U

In [None]:
# # If GLFW is absent
# %%bash
# sudo apt-get install libglfw3 -y
# sudo apt-get install libglfw3-dev -y
# pip install --user glfw

In [1]:
import ray
from ray.rllib.algorithms.ppo import PPOConfig
from ray.tune.registry import register_env
from ray.tune.logger import pretty_print

from ray import air, tune
from ray.air import session
from ray.air.integrations.wandb import setup_wandb
from ray.air.integrations.wandb import WandbLoggerCallback

import os
import gym
from darm_gym_env import DARMEnv

### TODO:
    - Change single_finger=False (In env register and config)
    - Change run_local_dir
    - Change run name, tags, and notes

## Register Environment with RLlib

In [2]:
def make_env(env_config):
    env = gym.wrappers.TimeLimit(env=DARMEnv(digits=["ii"],
                                             start_state_file="DARMHand_SF_start_state.npy"),
                                 max_episode_steps=200)
    # env = gym.wrappers.TransformObservation(env, lambda obs: obs*100)
    return env

env_creator = lambda env_config: make_env(env_config)
register_env("darm/DarmHand-v0", env_creator)

## Configure and Run

In [3]:
# TODO:
# change: rollout_workers
# change: num_envs_per_worker
# change: gpu

config = (
    PPOConfig()
    .environment(
        env="darm/DarmHand-v0"
    )
    .training(
        gamma=0.995,
        lambda_=0.95,
        clip_param=0.2,
        kl_coeff=1.0,
        num_sgd_iter=20,
        lr=0.0001,
        sgd_minibatch_size=32768,
        train_batch_size=320000,
        model={
            'fcnet_hiddens': [64, 256, 256, 64], # [32, 256, 256, 64],
            'fcnet_activation': 'relu'
        }
        # model  //={"free_log_std": true}, {"use_lstm": True},
    )
    .rollouts(
        num_rollout_workers=3,#121,
        num_envs_per_worker=4,
        # rollout_fragment_length=1,
        recreate_failed_workers=True,
        num_consecutive_worker_failures_tolerance=10,
        restart_failed_sub_environments=True,
        batch_mode="complete_episodes",     # watch out
        observation_filter="MeanStdFilter"  # watch out
    )
    .resources(num_gpus=0)#1)
    # .evaluation(evaluation_interval=100) # For 1000 timesteps iter; 100 evals
    .framework(framework="torch")
)
# config.to_dict()

In [4]:
# TODO:
# change: run name
# change: notes
# change: tags
# change: wandb config

env_tag = "dii"
run_name = f"RLlib_SAC_{env_tag}_position"

notes = """
- The environment was updated such that the target is within a range from the start point
- Velocity penalty was removed and only effort penalty was used
- The reward function was updated according to the reach task reward used in facebookresearch/myosuite [https://github.com/facebookresearch/myosuite/blob/main/myosuite/envs/myo/reach_v0.py]
- The done signal is trigerred only when the fingertip goes beyond a threshold. The episode continues to the maximum timestep otherwise.
- The friction and damping coefficient of the environment is updated. Values are inspired from Deepmind's Mujoco Menagerie [https://github.com/deepmind/mujoco_menagerie/blob/main/shadow_hand/right_hand.xml]
- The range of action from the model was changed to [-1, 1]. This action is mapped to the actual action sent to mujoco e.g [0, 2]]. This change is inspired from values used in OpenAI's Gym Mujoco environments.
- max_episode_steps was updated to 200.
- Velocity vector (size [3,]) was added to observation. Observation size is now (9,)
- Action range was increased to [0, 5]
<Changes: ID 3>
- Observation warpper to scale observation from m and m/s to cm and cm/s was applied
<Changes: ID 4>
- Max Tension for Digitorum Extensor Communis was increased to 10
- FIXED: Velocity Observation from (prev_pos - new_pos)/time to (new_pos - prev_pos)/time
- FIXED: Removed weight of 1 from 'sparse', 'solved', and 'done' in reward weighting
- Reduced max_target_th to 5*0.004 m. I.e. 20 mm
- Increased the number of envs to 24 to experiment with scaling the training
<Changes: ID 5>
- Updated Env Definition
    - Updated observation space. Increased observation of the state
    including target pose (7,); kinematic chain (12,) or (9,) for digit I;
    velocity (3,); and contacts with other fingers and the palm (6,)
    - Target is now specified as position and orientation of fingertip
    - Reward function now includes penalty terms for the angular
    displacement and contact with other fingers
    - Action space is still the continuous torque value applied to each
    tendon
    - Distance parameters passed into the environment and returned from the
    environment are now all specified in cm.
    - Environment modified in such a way that any combination of the 5
    digits can be used
    - Start states file now needs to be saved in the start_states sub-folder
    of the darm_gym_env directory, and passed as a parameter when creating
    the environment. This alows for the dynamics of combining different
    digits.
<Changes: ID 6>
- New Position Servo Environment
- Changed the action space to gym.spaces.MultiBinary

- Digit II; No Wrist Environment
- This run was trained on vast_ai using RLlib's SAC algorithm.
"""

tags = ["digit_ii", "ppo", "rllib", "vast_ai", "position_servo"]



wandb_init = dict(
    save_code=True,
    resume=True,
    config={
        "env": config.env,
        "num_rollout_workers": config.num_rollout_workers,
        "num_envs_per_worker": config.num_envs_per_worker,
        "recreate_failed_workers": config.recreate_failed_workers,
        "num_consecutive_worker_failures_tolerance": config.num_consecutive_worker_failures_tolerance,
        "restart_failed_sub_environments": config.restart_failed_sub_environments,
        "num_gpus": config.num_gpus,
        "framework": config.framework_str,
        
        "stop_episode_reward_mean": 1_300,
        "run_local_dir": f"{os.getenv('DARM_MUJOCO_PATH')}/darm_training/results/{env_tag}",
        
        "checkpoint_at_end": True,
        "checkpoint_score_attribute": "episode_reward_mean",  # or leave to save last chkpts
        "checkpoint_score_order": "max",
        "checkpoint_frequency": 50,   # iterations
        "num_checkpoints_to_keep": 3,
        "save_checkpoints_to_wandb": True
    },
    tags=tags,
    notes=notes,
    name=run_name
)

In [5]:
!pwd

/home/daniel/DARM/darm_mujoco/darm_training


## Run

In [6]:
# Train

sync_config = tune.SyncConfig()

tuner = tune.Tuner(
    "PPO",
    param_space=config.to_dict(),
    run_config=air.RunConfig(
        name=run_name,
        sync_config=sync_config,
        stop={"episode_reward_mean": wandb_init["config"]["stop_episode_reward_mean"]},
        
        local_dir=wandb_init["config"]["run_local_dir"],
        checkpoint_config=air.CheckpointConfig(
            checkpoint_at_end = wandb_init["config"]["checkpoint_at_end"],
            checkpoint_score_attribute = wandb_init["config"]["checkpoint_score_attribute"],  # or leave to save last chkpts
            checkpoint_score_order = wandb_init["config"]["checkpoint_score_order"],
            checkpoint_frequency = wandb_init["config"]["checkpoint_frequency"],
            num_to_keep = wandb_init["config"]["num_checkpoints_to_keep"]
        ),
        callbacks=[
                WandbLoggerCallback(project="DARM", 
                                    api_key="392c8a47eb0658eb5c71190757a69110e2140f4a",
                                    save_checkpoints=wandb_init["config"]["save_checkpoints_to_wandb"], 
                                    **wandb_init)
            ],
        )
)

results = tuner.fit()

2023-04-09 00:36:46,420	INFO worker.py:1538 -- Started a local Ray instance.


0,1
Current time:,2023-04-09 00:37:09
Running for:,00:00:21.37
Memory:,5.5/7.5 GiB

Trial name,# failures,error file
PPO_darm_DarmHand-v0_3ac57_00000,1,/home/daniel/DARM/darm_mujoco/darm_training/results/dii/RLlib_SAC_dii_position/PPO_darm_DarmHand-v0_3ac57_00000_0_2023-04-09_00-36-48/error.txt

Trial name,status,loc
PPO_darm_DarmHand-v0_3ac57_00000,ERROR,


[34m[1mwandb[0m: Currently logged in as: [33mdanieladejumo[0m. Use [1m`wandb login --relogin`[0m to force relogin
[2m[36m(PPO pid=11183)[0m 2023-04-09 00:36:52,433	INFO algorithm.py:501 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[33m(raylet)[0m [2023-04-09 00:36:56,400 E 10923 10968] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-04-09_00-36-44_370111_10817 is over 95% full, available space: 208240640; capacity: 31845081088. Object creation will fail if spilling is required.


[2m[36m(RolloutWorker pid=11322)[0m Loaded XML file successfully
[2m[36m(RolloutWorker pid=11322)[0m Number of tendon position actuators: 5
[2m[36m(RolloutWorker pid=11320)[0m Loaded XML file successfully


[2m[36m(RolloutWorker pid=11322)[0m 2023-04-09 00:37:00,557	ERROR worker.py:763 -- Exception raised in creation task: The actor died because of an error raised in its creation task, [36mray::RolloutWorker.__init__()[39m (pid=11322, ip=192.168.39.35, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7fa2493258b0>)
[2m[36m(RolloutWorker pid=11322)[0m   File "/home/daniel/miniconda3/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 712, in __init__
[2m[36m(RolloutWorker pid=11322)[0m     self._build_policy_map(
[2m[36m(RolloutWorker pid=11322)[0m   File "/home/daniel/miniconda3/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 1970, in _build_policy_map
[2m[36m(RolloutWorker pid=11322)[0m     self.policy_map.create_policy(
[2m[36m(RolloutWorker pid=11322)[0m   File "/home/daniel/miniconda3/lib/python3.8/site-packages/ray/rllib/policy/policy_map.py", line 146, in create_policy
[2m[36m(RolloutWorker pid=

[2m[36m(RolloutWorker pid=11320)[0m Number of tendon position actuators: 5
[2m[36m(RolloutWorker pid=11321)[0m Loaded XML file successfully


2023-04-09 00:37:00,894	ERROR trial_runner.py:1088 -- Trial PPO_darm_DarmHand-v0_3ac57_00000: Error processing event.
ray.tune.error._TuneNoNextExecutorEventError: Traceback (most recent call last):
  File "/home/daniel/miniconda3/lib/python3.8/site-packages/ray/tune/execution/ray_trial_executor.py", line 1070, in get_next_executor_event
    future_result = ray.get(ready_future)
  File "/home/daniel/miniconda3/lib/python3.8/site-packages/ray/_private/client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "/home/daniel/miniconda3/lib/python3.8/site-packages/ray/_private/worker.py", line 2311, in get
    raise value
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, [36mray::PPO.__init__()[39m (pid=11183, ip=192.168.39.35, repr=PPO)
  File "/home/daniel/miniconda3/lib/python3.8/site-packages/ray/rllib/evaluation/worker_set.py", line 239, in _setup
    self.add_workers(
  File "/home/daniel/miniconda3/lib/python3.8/s

[2m[36m(RolloutWorker pid=11321)[0m Number of tendon position actuators: 5


[2m[33m(raylet)[0m [2023-04-09 00:37:06,409 E 10923 10968] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-04-09_00-36-44_370111_10817 is over 95% full, available space: 207908864; capacity: 31845081088. Object creation will fail if spilling is required.


Trial name,trial_id
PPO_darm_DarmHand-v0_3ac57_00000,3ac57_00000


2023-04-09 00:37:09,340	ERROR ray_trial_executor.py:118 -- An exception occurred when trying to stop the Ray actor:Traceback (most recent call last):
  File "/home/daniel/miniconda3/lib/python3.8/site-packages/ray/tune/execution/ray_trial_executor.py", line 109, in _post_stop_cleanup
    ray.get(future, timeout=timeout)
  File "/home/daniel/miniconda3/lib/python3.8/site-packages/ray/_private/client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "/home/daniel/miniconda3/lib/python3.8/site-packages/ray/_private/worker.py", line 2311, in get
    raise value
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, [36mray::PPO.__init__()[39m (pid=11183, ip=192.168.39.35, repr=PPO)
  File "/home/daniel/miniconda3/lib/python3.8/site-packages/ray/rllib/evaluation/worker_set.py", line 239, in _setup
    self.add_workers(
  File "/home/daniel/miniconda3/lib/python3.8/site-packages/ray/rllib/evaluation/worker_set.py", line 612, 

In [59]:
# Ensure wandb is sysncing to cloud
# cd to darm_training again if not

'/home/daniel/DARM/darm_mujoco/darm_training/results/darm_sf_hand/test1_SF_RLlib_PPO'

[2m[33m(raylet)[0m [2023-02-22 11:42:45,948 E 5245 5295] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-22_11-21-52_053720_4034 is over 95% full, available space: 707887104; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-22 11:42:55,966 E 5245 5295] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-22_11-21-52_053720_4034 is over 95% full, available space: 707854336; capacity: 31845081088. Object creation will fail if spilling is required.


In [6]:
# TODO:
# change: experiment name

# Restore Interrupted run
tuner = tune.Tuner.restore(
    f"{wandb_init['config']['run_local_dir']}/{run_name}",
    resume_errored=True
)
tuner

<ray.tune.tuner.Tuner at 0x7f58802e9d00>

In [7]:
results = tuner.get_results()
results

<ray.tune.result_grid.ResultGrid at 0x7f5880280b20>

In [8]:
# Get the best result based on a particular metric.
best_result = results.get_best_result(metric="episode_reward_mean", mode="max")
best_result

The requested checkpoint is not available on this node, most likely because you are using Ray client or disabled checkpoint synchronization. To avoid this, enable checkpoint synchronization to cloud storage by specifying a `SyncConfig`. The checkpoint may be available on a different node - please check this location on worker nodes: /workspace/darm-mujoco/darm_training/results/SF_rllib_es_vast_ai_rew4/ES_darm_DarmSFHand-v0_e4270_00000_0_2023-02-14_22-57-01/checkpoint_000710


Result(metrics={'episode_reward_mean': 24.031534, 'episode_len_mean': 100.0, 'timesteps_this_iter': 115260, 'info': {'weights_norm': 594.49335, 'grad_norm': 4.660049, 'update_ratio': 0.024672424, 'episodes_this_iter': 1234, 'episodes_so_far': 745896}, 'done': False, 'trial_id': 'e4270_00000', 'perf': {'cpu_util_percent': 60.725, 'ram_util_percent': 19.5}, 'experiment_tag': '0'}, error=None, log_dir=PosixPath('/home/daniel/DARM/darm_mujoco/darm_training/results/SF_rllib_es_vast_ai_rew4/ES_darm_DarmSFHand-v0_e4270_00000_0_2023-02-14_22-57-01'))

In [9]:
# Get the best checkpoint corresponding to the best result.
best_checkpoint = best_result.checkpoint
best_checkpoint
# best_checkpoint = "/home/daniel/DARM/darm_mujoco/darm_training/results/SF_rllib_es_vast_ai/ES_darm_DarmSFHand-v0_ba596_00000_0_2023-02-14_00-30-05/checkpoint_000100"

In [11]:
best_checkpoint._local_path

'/home/daniel/DARM/darm_mujoco/darm_training/results/SF_rllib_es_vast_ai_rew3/ES_darm_DarmSFHand-v0_37337_00000_0_2023-02-14_15-35-32/checkpoint_000200'

In [10]:
# Get Algorithm from saved checkpoint
# from ray.rllib.algorithms.algorithm import Algorithm
# algo = Algorithm.from_checkpoint(best_checkpoint._local_path)
# algo

algo = config.build()
algo.restore(best_checkpoint._local_path)

Loaded XML file successfully


  logger.warn(
2023-02-15 02:08:05,175	INFO worker.py:1538 -- Started a local Ray instance.
[2m[33m(raylet)[0m [2023-02-15 02:08:14,135 E 27340 27387] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-15_02-08-02_051251_27228 is over 95% full, available space: 145473536; capacity: 31845081088. Object creation will fail if spilling is required.
2023-02-15 02:08:18,054	INFO es.py:401 -- Creating actors.
2023-02-15 02:08:18,082	INFO trainable.py:172 -- Trainable.setup took 16.976 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2023-02-15 02:08:18,103	INFO filter_manager.py:34 -- Synchronizing filters ...
[2m[36m(Worker pid=27624)[0m   logger.warn(
[2m[36m(Worker pid=27624)[0m 2023-02-15 02:08:23,178	INFO policy.py:1147 -- Policy (worker=local) running on CPU.
[2m[36m(Worker pid=27624)[0m 2023-02-15 02:08:23,178	INFO torch_policy.py:184 -- Found 0 visible cuda devices.


[2m[36m(Worker pid=27624)[0m Loaded XML file successfully


[2m[36m(Worker pid=27623)[0m   logger.warn(
[2m[36m(Worker pid=27623)[0m 2023-02-15 02:08:23,736	INFO policy.py:1147 -- Policy (worker=local) running on CPU.
[2m[36m(Worker pid=27623)[0m 2023-02-15 02:08:23,736	INFO torch_policy.py:184 -- Found 0 visible cuda devices.
2023-02-15 02:08:23,959	INFO filter_manager.py:55 -- Updating remote filters ...
2023-02-15 02:08:23,969	INFO trainable.py:790 -- Restored on 127.0.1.1 from checkpoint: /home/daniel/DARM/darm_mujoco/darm_training/results/SF_rllib_es_vast_ai_rew4/ES_darm_DarmSFHand-v0_e4270_00000_0_2023-02-14_22-57-01/checkpoint_000270
2023-02-15 02:08:23,970	INFO trainable.py:799 -- Current state after restoring: {'_iteration': 270, '_timesteps_total': 24334332, '_time_total': 916.8663566112518, '_episodes_total': None}


[2m[36m(Worker pid=27623)[0m Loaded XML file successfully


[2m[36m(Worker pid=27625)[0m   logger.warn(
[2m[36m(Worker pid=27625)[0m 2023-02-15 02:08:23,946	INFO policy.py:1147 -- Policy (worker=local) running on CPU.
[2m[36m(Worker pid=27625)[0m 2023-02-15 02:08:23,947	INFO torch_policy.py:184 -- Found 0 visible cuda devices.


[2m[36m(Worker pid=27625)[0m Loaded XML file successfully


[2m[33m(raylet)[0m [2023-02-15 02:08:24,140 E 27340 27387] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-15_02-08-02_051251_27228 is over 95% full, available space: 145403904; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-15 02:08:34,157 E 27340 27387] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-15_02-08-02_051251_27228 is over 95% full, available space: 145403904; capacity: 31845081088. Object creation will fail if spilling is required.


In [11]:
# resume the interrupted run
tuner.fit()

2023-02-11 17:26:26,421	INFO trial_runner.py:688 -- A local experiment checkpoint was found and will be used to restore the previous experiment state.
2023-02-11 17:26:26,422	INFO trial_runner.py:825 -- Using following checkpoint to resume: /home/daniel/DARM/darm_mujoco/darm_training/results/Test_DARMSF_DELTA_TARGET/experiment_state-2023-02-11_17-23-28.json
2023-02-11 17:26:26,440	INFO tune.py:653 -- TrialRunner resumed, ignoring new add_experiment but updating trial resources.


0,1
Current time:,2023-02-11 17:29:20
Running for:,00:02:54.01
Memory:,6.3/7.5 GiB

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
SAC_darm_DarmSFHand-v0_6a944_00000,RUNNING,192.168.152.36:15703,13,227.817,13026,-179.588,-166.097,-189.684,100


[2m[33m(raylet)[0m [2023-02-11 17:26:26,618 E 14732 14777] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_17-23-24_266872_14581 is over 95% full, available space: 1061683200; capacity: 31845081088. Object creation will fail if spilling is required.
[34m[1mwandb[0m: Currently logged in as: [33mdanieladejumo[0m. Use [1m`wandb login --relogin`[0m to force relogin
[2m[36m(SAC pid=15703)[0m 2023-02-11 17:26:31,413	INFO algorithm.py:501 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[33m(raylet)[0m [2023-02-11 17:26:36,627 E 14732 14777] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_17-23-24_266872_14581 is over 95% full, available space: 1061359616; capacity: 31845081088. Object creation will fail if spilling is required.


[2m[36m(RolloutWorker pid=15846)[0m Loaded XML file successfully
[2m[36m(RolloutWorker pid=15844)[0m Loaded XML file successfully


[2m[36m(RolloutWorker pid=15846)[0m   logger.warn(
[2m[36m(RolloutWorker pid=15844)[0m   logger.warn(


[2m[36m(RolloutWorker pid=15845)[0m Loaded XML file successfully


[2m[36m(RolloutWorker pid=15845)[0m   logger.warn(
[2m[36m(SAC pid=15703)[0m   logger.warn(


[2m[36m(SAC pid=15703)[0m Loaded XML file successfully


[2m[36m(SAC pid=15703)[0m 2023-02-11 17:26:40,442	INFO trainable.py:790 -- Restored on 192.168.152.36 from checkpoint: /tmp/checkpoint_tmp_7f50b6e15e2c473dba807bf1d398566d
[2m[36m(SAC pid=15703)[0m 2023-02-11 17:26:40,442	INFO trainable.py:799 -- Current state after restoring: {'_iteration': 11, '_timesteps_total': None, '_time_total': 113.04964661598206, '_episodes_total': 114}
[2m[33m(raylet)[0m [2023-02-11 17:26:46,634 E 14732 14777] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_17-23-24_266872_14581 is over 95% full, available space: 1061335040; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-11 17:26:56,640 E 14732 14777] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_17-23-24_266872_14581 is over 95% full, available space: 1061343232; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-11 17:27:06,648 E 14732 14777] (raylet) f

Trial name,agent_timesteps_total,counters,custom_metrics,date,done,episode_len_mean,episode_media,episode_reward_max,episode_reward_mean,episode_reward_min,episodes_this_iter,episodes_total,experiment_id,hostname,info,iterations_since_restore,node_ip,num_agent_steps_sampled,num_agent_steps_trained,num_env_steps_sampled,num_env_steps_sampled_this_iter,num_env_steps_trained,num_env_steps_trained_this_iter,num_faulty_episodes,num_healthy_workers,num_in_flight_async_reqs,num_remote_worker_restarts,num_steps_trained_this_iter,perf,pid,policy_reward_max,policy_reward_mean,policy_reward_min,sampler_perf,sampler_results,time_since_restore,time_this_iter_s,time_total_s,timers,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
SAC_darm_DarmSFHand-v0_6a944_00000,13026,"{'num_env_steps_sampled': 13026, 'num_env_steps_trained': 258304, 'num_agent_steps_sampled': 13026, 'num_agent_steps_trained': 258304, 'last_target_update_ts': 13026, 'num_target_updates': 1009}",{},2023-02-11_17-28-35,False,100,{},-166.097,-179.588,-189.684,9,132,2674246d3b814ef583cb37ca785123d2,Daniel,"{'learner': {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 8.40356159210205, 'actor_loss': -4.885239601135254, 'critic_loss': 0.3069121241569519, 'alpha_loss': -2.5390048027038574, 'alpha_value': 0.7392387, 'log_alpha_value': -0.30213442, 'target_entropy': -5.0, 'policy_t': -0.029988128691911697, 'mean_q': 2.379087448120117, 'max_q': 3.1470589637756348, 'min_q': 1.5433847904205322}, 'td_error': array([7.4213958e-01, 1.5848637e-01, 6.0251343e-01, 9.3348145e-01,  7.2470105e-01, 6.5075898e-01, 7.4386942e-01, 4.2802992e+00,  4.9475217e-01, 2.1274698e-01, 1.5443254e-01, 2.0181298e-01,  4.8542452e-01, 4.9696553e-01, 3.7915547e+00, 8.3584547e-02,  8.3843565e-01, 7.5096285e-01, 6.2452388e-01, 2.4125576e-01,  7.7261329e-01, 2.6608777e-01, 3.3530772e-01, 2.6860654e-01,  1.5399015e-01, 7.0978558e-01, 7.8079522e-01, 1.0731530e-01,  8.8066232e-01, 1.1126903e+00, 3.6070585e-02, 6.7874563e-01,  7.5406009e-01, 4.2981052e-01, 1.1391871e+00, 3.9740098e-01,  1.0762990e+00, 8.4136343e-01, 5.8252001e-01, 4.0861154e-01,  5.6281984e-01, 2.7024639e-01, 6.9000638e-01, 8.6244369e-01,  5.7595563e-01, 7.2603118e-01, 5.9470689e-01, 2.7473211e-01,  5.6826186e-01, 2.4650784e+02, 9.8598832e-01, 7.3479068e-01,  6.1449623e-01, 1.2699622e+00, 7.5296319e-01, 2.8090358e-02,  9.4109213e-01, 8.2771111e-01, 4.2838442e-01, 3.8090675e+00,  4.7546709e-01, 2.4742079e-01, 4.1203547e-01, 7.3801911e-01,  1.0025257e+00, 6.7763782e-01, 6.7099619e-01, 8.6762822e-01,  5.6190348e-01, 8.8954902e-01, 8.1222010e-01, 8.6386180e-01,  7.6953566e-01, 1.0633967e+00, 5.9996891e-01, 5.3750610e-01,  7.0670819e-01, 4.9724150e-01, 3.3370614e-02, 6.8903613e-01,  9.4764221e-01, 5.0915122e-02, 5.0027347e-01, 9.6055913e-01,  5.5192137e-01, 7.9515433e-01, 7.2671640e-01, 3.9931262e-01,  1.8239129e-01, 9.9649012e-01, 8.4206927e-01, 4.1600978e-01,  4.0527940e-01, 7.6102638e-01, 2.3393106e-01, 4.7766042e-01,  2.2459340e-01, 8.5827851e-01, 1.4306033e-01, 2.4650784e+02,  7.1198571e-01, 3.9922416e+00, 1.2246186e+00, 7.4194229e-01,  2.7496171e-01, 4.5212805e-02, 7.4664807e-01, 1.3847947e-02,  8.7445688e-01, 6.6402781e-01, 1.0255686e+00, 4.5125723e-01,  4.8755097e-01, 2.4650784e+02, 4.4124365e-01, 1.0487792e+00,  5.8346188e-01, 2.6959336e-01, 3.5287654e-01, 5.9907603e-01,  4.8603582e-01, 6.1551094e-01, 6.9831514e-01, 5.1433253e-01,  1.8200487e-01, 9.6122825e-01, 7.8497732e-01, 2.2768998e-01,  9.6964097e-01, 1.4972503e+00, 8.0229974e-01, 1.0484257e+00,  5.5421102e-01, 8.3084774e-01, 4.7661805e-01, 3.9173824e-01,  3.1396019e-01, 4.2802992e+00, 2.7052438e-01, 2.6957560e-01,  7.5368738e-01, 4.4456518e-01, 3.1527257e-01, 8.5121763e-01,  9.0664178e-01, 9.4629610e-01, 5.6297445e-01, 5.9285718e-01,  6.3104606e-01, 5.2718985e-01, 6.5370166e-01, 7.0399725e-01,  4.5417070e-02, 2.4650784e+02, 7.2803473e-01, 1.1245636e+00,  3.7708211e-01, 3.7433398e-01, 4.3422055e-01, 3.2808065e-01,  6.2305951e-01, 1.7103601e-01, 7.9449832e-01, 1.3040452e+00,  7.1471536e-01, 4.5487504e+00, 4.1272748e-01, 6.5745860e-01,  6.6768157e-01, 8.8028562e-01, 7.0535421e-01, 5.2402341e-01,  5.6226981e-01, 5.4202604e-01, 2.7826047e-01, 2.6031137e-01,  6.0549617e-02, 3.6561573e-01, 2.4650784e+02, 8.0606019e-01,  8.4074116e-01, 4.9388194e-01, 7.1800745e-01, 2.9282093e-02,  1.9090211e-01, 3.8544512e-01, 1.4638956e+00, 1.4547678e+00,  1.0922147e+00, 2.6176953e-01, 1.3020796e-01, 5.6222248e-01,  5.6339896e-01, 7.6045167e-01, 7.8438163e-01, 7.5755298e-01,  8.2661462e-01, 3.5743856e-01, 1.3571662e-01, 5.3244066e-01,  8.8719201e-01, 8.2828355e-01, 3.8229942e-01, 6.0678411e-01,  4.7898412e-01, 8.2518208e-01, 5.2971601e-01, 6.7987609e-01,  7.6182199e-01, 1.0264168e+00, 6.2066817e-01, 9.0486789e-01,  4.7908902e-01, 1.1681950e-01, 7.6850456e-01, 3.1422675e-01,  9.3148047e-01, 9.5507002e-01, 8.3421135e-01, 5.6414163e-01,  4.1598296e-01, 5.0719857e-02, 9.6793044e-01, 1.4145180e+00,  1.4200950e-01, 8.1434751e-01, 7.0387411e-01, 8.6176515e-01,  6.2346458e-01, 1.4636874e-01, 3.2455921e-01, 1.5807381e+00,  5.9650755e-01, 7.9351628e-01, 1.6089365e+00, 7.5115800e-01,  5.8976293e-01, 4.7450304e-02, 6.6682827e-01, 7.1542680e-01,  4.6520185e-01, 3.4638846e-01, 7.5957966e-01, 4.9341345e-01,  4.8143768e-01, 1.2025452e-01, 6.0646594e-01, 1.1619196e+00,  2.7393532e-01, 8.4904301e-01, 2.5427663e-01, 7.0259297e-01,  5.2577734e-01, 2.9342413e-01, 6.1365223e-01, 9.0736806e-01],  dtype=float32), 'mean_td_error': 5.492199897766113, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 256.0, 'num_grad_updates_lifetime': 668.0, 'diff_num_grad_updates_vs_sampler_policy': 667.0}}, 'num_env_steps_sampled': 13026, 'num_env_steps_trained': 258304, 'num_agent_steps_sampled': 13026, 'num_agent_steps_trained': 258304, 'last_target_update_ts': 13026, 'num_target_updates': 1009}",2,192.168.152.36,13026,258304,13026,1002,258304,85504,0,3,0,0,85504,"{'cpu_util_percent': 54.76744186046512, 'ram_util_percent': 85.32209302325585}",15703,{},{},{},"{'mean_raw_obs_processing_ms': 1.3155451329359085, 'mean_inference_ms': 2.6820931912181267, 'mean_action_processing_ms': 0.25946855188663404, 'mean_env_wait_ms': 3.287473482817159, 'mean_env_render_ms': 0.0}","{'episode_reward_max': -166.09740307927132, 'episode_reward_min': -189.6840973868966, 'episode_reward_mean': -179.5880893824829, 'episode_len_mean': 100.0, 'episode_media': {}, 'episodes_this_iter': 9, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [-187.3498569726944, -166.09740307927132, -172.9712873697281, -187.82146245241165, -176.65354753285646, -183.53197374939919, -176.7706963941455, -189.6840973868966, -175.4124795049429], 'episode_lengths': [100, 100, 100, 100, 100, 100, 100, 100, 100]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 1.3155451329359085, 'mean_inference_ms': 2.6820931912181267, 'mean_action_processing_ms': 0.25946855188663404, 'mean_env_wait_ms': 3.287473482817159, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0}",114.767,62.318,227.817,"{'training_iteration_time_ms': 151.985, 'load_time_ms': 0.246, 'load_throughput': 1042265.409, 'learn_time_ms': 25.824, 'learn_throughput': 9913.287, 'synch_weights_time_ms': 6.049}",1676132915,0,13026,13,6a944_00000,9.03385


[34m[1mwandb[0m: Adding directory to artifact (/home/daniel/DARM/darm_mujoco/darm_training/results/Test_DARMSF_DELTA_TARGET/SAC_darm_DarmSFHand-v0_6a944_00000_0_2023-02-11_17-23-28/checkpoint_000012)... Done. 0.0s
[2m[33m(raylet)[0m [2023-02-11 17:27:36,665 E 14732 14777] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_17-23-24_266872_14581 is over 95% full, available space: 1055997952; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-11 17:27:46,672 E 14732 14777] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_17-23-24_266872_14581 is over 95% full, available space: 1055973376; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-11 17:27:56,678 E 14732 14777] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_17-23-24_266872_14581 is over 95% full, available space: 1055977472; capacity: 31845081088. Object creation will fai

<ray.tune.result_grid.ResultGrid at 0x7fedc40a1d90>

In [43]:
# Use similar environment to what we trained on
env = gym.wrappers.TimeLimit(env=DARMSFEnv(render_mode="human", reaction_time=0.08, hand_name="hand1"), max_episode_steps=200)


obs = env.reset()

episode_reward = 0
done = False

res = []

while not done:
    env.render()
    action = algo.compute_single_action(obs)
    obs,rew, done, info = env.step(action)
    episode_reward += rew
    # if info["reward"]["reach_reward"] == 100:
    #     print("Done")
    #     obs = env.reset()
    
    res.append(f"Step Reward: {rew}, \n Action: {action} \n Info: {info} \n\n\n")
print(f"Step Reward: {rew}, \n Action: {action} \n Info: {info} \n\n\n")
episode_reward

Loaded XML file successfully


[2m[33m(raylet)[0m [2023-02-15 02:17:05,090 E 27340 27387] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-15_02-08-02_051251_27228 is over 95% full, available space: 144625664; capacity: 31845081088. Object creation will fail if spilling is required.


Step Reward: -0.044669644004530815, 
 Action: [-0.3416028  -0.35292995  0.07321934 -0.8370828  -0.79603755] 
 Info: {'sim_time': 8.149999999999391, 'action': array([0.        , 0.        , 0.07321934, 0.        , 0.        ],
      dtype=float32), 'reward': {'reach': array([-0.02050434]), 'bonus': array([0.]), 'act_reg': -0.03660966828465462, 'penalty': array([-0.]), 'sparse': array([-0.02050434]), 'solved': array([False]), 'done': array([False]), 'dense': array([-0.04466964])}, 'TimeLimit.truncated': True} 





-3.821191225618935

In [44]:
env.close()
episode_reward

-3.821191225618935

[2m[33m(raylet)[0m [2023-02-15 02:17:15,111 E 27340 27387] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-15_02-08-02_051251_27228 is over 95% full, available space: 144592896; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-15 02:17:25,135 E 27340 27387] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-15_02-08-02_051251_27228 is over 95% full, available space: 144580608; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-15 02:17:35,157 E 27340 27387] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-15_02-08-02_051251_27228 is over 95% full, available space: 144560128; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-15 02:17:45,177 E 27340 27387] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-15_02-08-02_051251_27228 is over 95% full, available space: 144523264

In [23]:
[print(i) for i in res]

Step Reward: 3.984342810409328, 
 Action: [-0.8860119  -0.30007792  0.01995797 -0.8822051  -0.63987327] 
 Info: {'sim_time': 0.08000000000000006, 'action': array([0.        , 0.        , 0.01995797, 0.        , 0.        ],
      dtype=float32), 'reward': {'reach': array([-0.00732965]), 'bonus': array([1.]), 'act_reg': -0.00997898355126381, 'penalty': array([-0.]), 'sparse': array([-0.00732965]), 'solved': array([False]), 'done': array([False]), 'dense': array([3.98434281])}} 



Step Reward: 3.9829649989700893, 
 Action: [-0.8721748  -0.2886872   0.03908411 -0.87554014 -0.64917165] 
 Info: {'sim_time': 0.16000000000000011, 'action': array([0.        , 0.        , 0.03908411, 0.        , 0.        ],
      dtype=float32), 'reward': {'reach': array([-0.0075404]), 'bonus': array([1.]), 'act_reg': -0.019542057067155838, 'penalty': array([-0.]), 'sparse': array([-0.0075404]), 'solved': array([False]), 'done': array([False]), 'dense': array([3.982965])}} 



Step Reward: 3.9830823392285124,

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

[2m[33m(raylet)[0m [2023-02-15 02:11:34,464 E 27340 27387] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-15_02-08-02_051251_27228 is over 95% full, available space: 145035264; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-15 02:11:44,483 E 27340 27387] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-15_02-08-02_051251_27228 is over 95% full, available space: 145035264; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-15 02:11:54,501 E 27340 27387] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-15_02-08-02_051251_27228 is over 95% full, available space: 145027072; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-15 02:12:04,519 E 27340 27387] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-15_02-08-02_051251_27228 is over 95% full, available space: 145022976

In [51]:
algo.stop()

KeyboardInterrupt: 

In [64]:
import wandb
run = wandb.init()

[34m[1mwandb[0m: Currently logged in as: [33mdanieladejumo[0m. Use [1m`wandb login --relogin`[0m to force relogin
[2m[33m(raylet)[0m [2023-02-14 03:12:20,557 E 13144 13195] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-14_02-59-27_105877_12500 is over 95% full, available space: 72884224; capacity: 31845081088. Object creation will fail if spilling is required.


In [65]:
artifact = run.use_artifact('danieladejumo/DARM/checkpoint_SF_rllib_es_vast_ai:v3', type='model')

In [66]:
artifact_dir = artifact.download(root="/home/daniel/DARM/darm_mujoco/darm_training/results/vast_ai_checkpoints")

[34m[1mwandb[0m:   4 of 4 files downloaded.  
[2m[33m(raylet)[0m [2023-02-14 03:12:30,571 E 13144 13195] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-14_02-59-27_105877_12500 is over 95% full, available space: 72368128; capacity: 31845081088. Object creation will fail if spilling is required.


In [67]:
!ls /home/daniel/DARM/darm_mujoco/darm_training/results/vast_ai_checkpoints

ls: cannot access '/home/daniel/DARM/darm_mujoco/darm_training/results/vast_ai_checkpoints': No such file or directory


[2m[33m(raylet)[0m [2023-02-14 03:12:40,589 E 13144 13195] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-14_02-59-27_105877_12500 is over 95% full, available space: 72364032; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-14 03:12:50,607 E 13144 13195] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-14_02-59-27_105877_12500 is over 95% full, available space: 72351744; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-14 03:13:00,615 E 13144 13195] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-14_02-59-27_105877_12500 is over 95% full, available space: 72355840; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-14 03:13:10,626 E 13144 13195] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-14_02-59-27_105877_12500 is over 95% full, available space: 72351744; ca