## Installation and Imports

In [1]:
!pwd

/home/daniel/DARM/darm_mujoco/darm_training


In [2]:
# Configure env variables

# TODO: change path
import os
os.environ["DARM_MUJOCO_PATH"] = "/home/daniel/DARM/darm_mujoco"
os.getenv('DARM_MUJOCO_PATH')

'/home/daniel/DARM/darm_mujoco'

In [3]:
# Check if GCC is installed
!gcc --version

gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
Copyright (C) 2017 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.



In [None]:
# Install GCC if absent
!sudo apt update
!sudo apt install build-essential -y

In [None]:
# # Setup Mujoco for gym - If needed
# !apt-get install -y \
#     libgl1-mesa-dev \
#     libgl1-mesa-glx \
#     libglew-dev \
#     libosmesa6-dev \
#     software-properties-common

# !apt-get install -y patchelf

# !pip install gym

# !pip install free-mujoco-py

# import mujoco_py
# import gym

In [3]:
!pip install ray[rllib] torch
!pip install wandb
!pip install tensorflow_probability



In [4]:
!python setup.py install

/home/daniel/DARM/darm_mujoco
running install
running bdist_egg
running egg_info
writing darm_gym_env.egg-info/PKG-INFO
writing dependency_links to darm_gym_env.egg-info/dependency_links.txt
writing requirements to darm_gym_env.egg-info/requires.txt
writing top-level names to darm_gym_env.egg-info/top_level.txt
reading manifest file 'darm_gym_env.egg-info/SOURCES.txt'
writing manifest file 'darm_gym_env.egg-info/SOURCES.txt'
installing library code to build/bdist.linux-x86_64/egg
running install_lib
running build_py
creating build/bdist.linux-x86_64/egg
creating build/bdist.linux-x86_64/egg/darm_gym_env
copying build/lib/darm_gym_env/darm_sf_gym.py -> build/bdist.linux-x86_64/egg/darm_gym_env
copying build/lib/darm_gym_env/__init__.py -> build/bdist.linux-x86_64/egg/darm_gym_env
copying build/lib/darm_gym_env/multi_darm_gym.py -> build/bdist.linux-x86_64/egg/darm_gym_env
copying build/lib/darm_gym_env/darm_gym.py -> build/bdist.linux-x86_64/egg/darm_gym_env
copying build/lib/darm_gym_e

In [None]:
# Check if mujoco import is successful
import mujoco

In [None]:
# If mujoco import fails, update pandas and restart runtime
!pip install pandas -U

In [None]:
# # If GLFW is absent
# %%bash
# sudo apt-get install libglfw3 -y
# sudo apt-get install libglfw3-dev -y
# pip install --user glfw

In [4]:
import ray
from ray.rllib.algorithms.sac import SACConfig
from ray.tune.registry import register_env
from ray.tune.logger import pretty_print

from ray import air, tune
from ray.air import session
from ray.air.integrations.wandb import setup_wandb
from ray.air.integrations.wandb import WandbLoggerCallback

import gym
from darm_gym_env import DARMSFEnv

## Register Environment with RLlib

In [6]:
# env_creator = lambda env_config: gym.make("darm/DarmSFHand-v0", render_mode=None, hand_name="hand1") # DARMSFEnv(render_mode=None, reaction_time=0.08, hand_name="hand1") # 

def make_env(env_config):
    env = gym.wrappers.TimeLimit(env=DARMSFEnv(render_mode=None, reaction_time=0.08, hand_name="hand1"), max_episode_steps=200)
    return env
env_creator = lambda env_config: make_env(env_config) #gym.wrappers.TimeLimit(env=DARMSFEnv(render_mode=None, reaction_time=0.08, hand_name="hand1"), max_episode_steps=200)

register_env("darm/DarmSFHand-v0", env_creator)

## Configure and Run

In [7]:
# TODO:
# change: rollout_workers
# change: gpu

config = (
    SACConfig()
    .environment(
        env="darm/DarmSFHand-v0",
        normalize_actions=True
    )
    .training(
        q_model_config={
            "fcnet_activation": "relu",
            "fcnet_hiddens": [32, 256, 256, 64] # [256, 256]
        },
        policy_model_config={
            "fcnet_activation": "relu",
            "fcnet_hiddens": [32, 256, 256, 64] # [256, 256]
        },
        tau=0.005,
        target_entropy="auto",
        n_step=1,  # num of SGD steps per batch of data in training step
        train_batch_size=256,
        target_network_update_freq=1,
        replay_buffer_config={"type":"MultiAgentPrioritizedReplayBuffer"},
        num_steps_sampled_before_learning_starts=10_000,
        optimization_config={
          "actor_learning_rate": 0.0003,
          "critic_learning_rate": 0.0003,
          "entropy_learning_rate": 0.0003,
        },
        clip_actions=False
    )
    .rollouts(
        num_rollout_workers=1,
        num_envs_per_worker=4,
        rollout_fragment_length=1,
        recreate_failed_workers=True,
        num_consecutive_worker_failures_tolerance=10,
        restart_failed_sub_environments=True,
        # batch_mode="complete_episodes"
    )
    .resources(num_gpus=0)
    .evaluation(evaluation_interval=100) # For 1000 timesteps iter; 100 evals
    .reporting(
        min_sample_timesteps_per_iteration=1000,
        metrics_num_episodes_for_smoothing=5
    )
    .framework(framework="torch")
)

In [8]:
# TODO:
# change: rollout_workers
# change: gpu
# change: tags
# change: name

wandb_init = dict(
    save_code=True,
    resume=True,
    config={
        "env": "DARMSFHand-v0",
        
        "actor_learning_rate": 0.0003,
        "critic_learning_rate": 0.0003,
        "entropy_learning_rate": 0.0003,
        "framework": "torch",
        
        "num_rollout_workers": 1,
        "num_envs_per_worker": 4,
        "num_gpu": 0,
        "metrics_num_episodes_for_smoothing": 5
    },
    tags=["single_finger"],
    notes="Updated env dynamics and action range to [-1, 1]",
    name="SF_rllib_sac_local"
    # job_type=
    # monitor_gym=
)

In [9]:
%cd /home/daniel/DARM/darm-mujoco/darm_training/

[Errno 2] No such file or directory: '/home/daniel/DARM/darm-mujoco/darm_training/'
/home/daniel/DARM/darm_mujoco/darm_training


In [10]:
# TODO: 
# change: name
# change: checkpoint_freq

sync_config = tune.SyncConfig()

tuner = tune.Tuner(
    "SAC",
    run_config=air.RunConfig(
        name="SF_rllib_sac_local",
        local_dir=f"{os.getenv('DARM_MUJOCO_PATH')}/darm_training/results",
        sync_config=sync_config,
        stop={"training_iteration": 10_000, "episode_reward_mean": 400},
        checkpoint_config=air.CheckpointConfig(
            checkpoint_at_end=True,
            checkpoint_score_attribute="episode_reward_mean",  # or leave to save last chkpts
            checkpoint_score_order="max",
            checkpoint_frequency=10,
            num_to_keep=3
        ),
        callbacks=[
                WandbLoggerCallback(project="DARM", 
                                    api_key="392c8a47eb0658eb5c71190757a69110e2140f4a",
                                    save_checkpoints=True, 
                                    **wandb_init)
            ],
        ),
    param_space=config
)

results = tuner.fit()

2023-02-21 12:10:47,688	INFO worker.py:1538 -- Started a local Ray instance.


0,1
Current time:,2023-02-21 12:20:33
Running for:,00:09:44.63
Memory:,6.3/7.5 GiB

Trial name,# failures,error file
SAC_darm_DarmSFHand-v0_65686_00000,1,/home/daniel/DARM/darm_mujoco/darm_training/results/SF_rllib_sac_local/SAC_darm_DarmSFHand-v0_65686_00000_0_2023-02-21_12-10-49/error.txt

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
SAC_darm_DarmSFHand-v0_65686_00000,ERROR,127.0.1.1:3910,20,522.449,20000,-35.8496,99.181,-56.0222,44.8095


[2m[36m(SAC pid=3910)[0m 2023-02-21 12:10:53,471	INFO algorithm.py:501 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[34m[1mwandb[0m: Network error (ConnectionError), entering retry loop.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[2m[33m(raylet)[0m [2023-02-21 12:10:57,603 E 3637 3687] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-21_12-10-45_607475_3376 is over 95% full, available space: 123559936; capacity: 31845081088. Object creation will fail if spilling is required.


[2m[36m(RolloutWorker pid=4013)[0m Loaded XML file successfully


[2m[36m(RolloutWorker pid=4013)[0m   logger.warn(
[2m[36m(RolloutWorker pid=4013)[0m   return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


[2m[36m(RolloutWorker pid=4013)[0m Loaded XML file successfully
[2m[36m(RolloutWorker pid=4013)[0m Loaded XML file successfully
[2m[36m(RolloutWorker pid=4013)[0m Loaded XML file successfully
[2m[36m(SAC pid=3910)[0m Loaded XML file successfully


[2m[36m(SAC pid=3910)[0m   logger.warn(
[2m[36m(SAC pid=3910)[0m   return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


[2m[36m(SAC pid=3910)[0m Loaded XML file successfully
[2m[36m(SAC pid=3910)[0m Loaded XML file successfully
[2m[36m(SAC pid=3910)[0m Loaded XML file successfully

[2m[36m(SAC pid=3910)[0m 2023-02-21 12:11:04,831	INFO trainable.py:172 -- Trainable.setup took 11.362 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.





[2m[36m(RolloutWorker pid=4013)[0m   return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
[2m[33m(raylet)[0m [2023-02-21 12:11:07,609 E 3637 3687] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-21_12-10-45_607475_3376 is over 95% full, available space: 123523072; capacity: 31845081088. Object creation will fail if spilling is required.


Trial name,agent_timesteps_total,counters,custom_metrics,date,done,episode_len_mean,episode_media,episode_reward_max,episode_reward_mean,episode_reward_min,episodes_this_iter,episodes_total,experiment_id,experiment_tag,hostname,info,iterations_since_restore,node_ip,num_agent_steps_sampled,num_agent_steps_trained,num_env_steps_sampled,num_env_steps_sampled_this_iter,num_env_steps_trained,num_env_steps_trained_this_iter,num_faulty_episodes,num_healthy_workers,num_in_flight_async_reqs,num_remote_worker_restarts,num_steps_trained_this_iter,perf,pid,policy_reward_max,policy_reward_mean,policy_reward_min,sampler_perf,sampler_results,time_since_restore,time_this_iter_s,time_total_s,timers,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
SAC_darm_DarmSFHand-v0_65686_00000,20000,"{'num_env_steps_sampled': 20000, 'num_env_steps_trained': 640000, 'num_agent_steps_sampled': 20000, 'num_agent_steps_trained': 640000, 'last_target_update_ts': 20000, 'num_target_updates': 2500}",{},2023-02-21_12-19-48,False,44.8095,{},99.181,-35.8496,-56.0222,21,379,7121a2897dbd402897ff3c3f1b9afd95,0,Daniel,"{'learner': {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 8.34366226196289, 'actor_loss': -20.098920822143555, 'critic_loss': 0.9462350010871887, 'alpha_loss': -6.25947904586792, 'alpha_value': 0.4722685, 'log_alpha_value': -0.7502076, 'target_entropy': -5.0, 'policy_t': -0.03155910596251488, 'mean_q': 18.46891212463379, 'max_q': 23.077219009399414, 'min_q': 13.582803726196289}, 'td_error': array([2.8098392e-01, 2.9108019e+00, 6.4680290e+01, 6.5683914e+01,  4.9462223e-01, 6.4996147e+01, 4.7455978e-01, 8.9980488e+00,  2.3372936e-01, 3.5990210e+00, 6.5933655e+01, 5.4631615e-01,  2.0213696e+01, 8.7169428e+00, 6.4353813e+01, 3.0978298e-01,  8.5065079e-01, 3.9201126e+00, 2.3979187e-01, 3.9770317e-01,  6.4964417e+01, 4.2917156e-01, 6.7584038e-01, 1.6901302e-01,  1.7328930e-01, 3.2683659e-01, 1.8280910e+01, 1.0691929e-01,  5.5623436e-01, 8.5890102e-01, 3.4995747e-01, 2.2137928e-01,  4.1144753e-01, 5.4380894e-01, 1.0319042e+00, 1.7892584e+01,  8.4710789e-01, 6.5739151e+01, 3.5657778e+00, 2.1115828e-01,  3.0154896e-01, 8.8747663e+00, 7.0559025e-01, 1.3753867e+00,  1.8483109e+01, 6.5013733e+01, 3.4921818e+00, 4.1988583e+00,  6.4985550e+01, 3.9344263e+00, 1.9768047e-01, 5.4177189e-01,  7.5754166e-02, 3.9272079e+00, 6.2749386e-01, 5.3561497e-01,  4.5365620e-01, 6.4077080e+01, 9.5634270e-01, 6.4658401e+01,  6.5264069e+01, 2.2497082e-01, 4.0964355e+00, 6.4210678e+01,  4.4881582e-01, 5.0739288e-01, 6.5402237e+01, 3.3568029e+00,  6.5342514e+01, 9.7294474e-01, 1.6293812e-01, 6.4963646e+01,  3.4381866e-01, 7.0247364e-01, 1.3971710e-01, 2.6442623e-01,  6.3988781e-01, 1.6987778e+01, 1.0796986e+00, 2.0207787e-01,  5.9205914e-01, 1.6658688e-01, 2.0581678e+01, 8.5054827e+00,  6.4431229e+01, 1.9734287e-01, 5.0262165e-01, 6.5098068e+01,  7.0434475e-01, 4.6113110e-01, 6.5129707e+01, 3.5407820e+00,  3.6175127e+00, 4.3661499e-01, 1.6813469e-01, 4.6052294e+00,  1.5591760e+00, 6.4702805e+01, 1.7656040e-01, 6.3683453e+01,  6.6634499e+01, 3.5439539e+00, 2.2020817e-01, 4.5546818e-01,  6.3891548e+01, 5.6625080e-01, 2.2960758e-01, 3.1537914e-01,  8.4593391e+00, 2.9612446e-01, 3.6262035e-01, 6.4544891e+01,  6.9693756e-01, 1.3388252e-01, 4.1897058e+00, 2.6604080e-01,  3.4599428e+00, 1.9654564e+01, 1.3248158e-01, 5.3291321e-02,  6.4649628e+01, 2.9576969e-01, 3.6828136e-01, 8.4107666e+00,  2.1689106e+01, 2.4573469e+00, 1.1618614e-01, 6.5425789e+01,  5.3049946e-01, 4.6811199e-01, 3.1318188e-01, 3.0209150e+00,  6.4306305e+01, 1.0853815e+00, 8.6286154e+00, 6.4555641e+01,  6.4912514e+01, 8.2594080e+00, 1.6148663e-01, 6.5494293e+01,  6.5299576e+01, 6.5377930e+01, 1.1393356e+00, 4.2160072e+00,  6.4985550e+01, 4.1149616e-01, 6.4388199e+01, 5.5129623e-01,  3.0411434e-01, 9.1480761e+00, 6.5054077e+01, 3.3440971e+00,  1.7685223e-01, 5.0328922e-01, 5.3289032e-01, 7.9227257e-01,  4.4860363e-01, 5.1988792e-01, 2.9105091e-01, 6.4431229e+01,  1.5519619e-01, 1.7117409e+01, 7.7591991e-01, 4.5020294e-01,  6.4702805e+01, 5.5268860e-01, 4.5212650e-01, 6.5267822e+01,  1.1635942e+00, 1.9039917e-01, 3.5198689e-01, 3.3176804e-01,  2.6548862e-01, 6.2735748e-01, 2.1870346e+01, 1.8226528e-01,  4.4756889e-01, 9.1760921e-01, 2.0930004e-01, 3.7551203e+00,  3.9902239e+00, 8.3127880e-01, 6.5113281e+01, 2.1689106e+01,  4.6939278e-01, 2.5425053e-01, 6.5533249e+01, 2.6024151e-01,  8.0341053e-01, 1.6423702e-01, 6.7962360e-01, 6.5141602e+01,  1.4282179e-01, 4.5076084e-01, 3.9436626e-01, 2.1761990e-01,  6.4218597e+01, 3.7626934e-01, 7.0015907e-02, 3.6425781e-01,  3.8261986e-01, 9.2189789e-01, 6.4210678e+01, 6.3952080e+01,  3.9793873e-01, 3.9280891e-02, 1.7706688e+01, 7.2814131e-01,  6.4596405e+01, 6.2583017e-01, 6.5148483e+01, 3.1291485e-01,  3.5307884e-02, 9.2028332e-01, 6.4682236e+01, 8.2102451e+00,  6.9448090e-01, 3.5887241e-01, 2.5736141e-01, 2.1440411e-01,  3.5326014e+00, 6.5124863e+01, 3.6624708e+00, 2.1570604e+01,  3.3911085e+00, 6.5208435e+01, 5.0544071e-01, 7.2669888e-01,  6.4658401e+01, 3.6429300e+00, 3.5400019e+00, 4.7265596e+00,  4.2932987e-01, 2.2146255e+01, 3.6733685e+00, 3.8660727e+00,  7.0855284e-01, 3.0791283e-01, 1.3463211e-01, 2.2605389e+01,  4.5795155e-01, 6.5845726e+01, 3.0511427e+00, 6.4388199e+01,  1.4362335e-01, 6.4706131e+01, 7.8983879e-01, 7.7039146e-01,  2.1313953e-01, 6.4233498e+01, 6.5050461e+01, 6.4947083e+01,  4.6719742e-01, 4.5491505e-01, 3.9731216e-01, 5.9753036e-01],  dtype=float32), 'mean_td_error': 16.39834213256836, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 256.0, 'num_grad_updates_lifetime': 2500.0, 'diff_num_grad_updates_vs_sampler_policy': 2499.0}}, 'num_env_steps_sampled': 20000, 'num_env_steps_trained': 640000, 'num_agent_steps_sampled': 20000, 'num_agent_steps_trained': 640000, 'last_target_update_ts': 20000, 'num_target_updates': 2500}",20,127.0.1.1,20000,640000,20000,1000,640000,64000,0,1,0,0,64000,"{'cpu_util_percent': 57.05555555555555, 'ram_util_percent': 85.45238095238093}",3910,{},{},{},"{'mean_raw_obs_processing_ms': 3.5537882486216383, 'mean_inference_ms': 2.0042037372707346, 'mean_action_processing_ms': 0.3657521688563899, 'mean_env_wait_ms': 7.3906303143363035, 'mean_env_render_ms': 0.0}","{'episode_reward_max': 99.18102581763432, 'episode_reward_min': -56.02219545627752, 'episode_reward_mean': -35.84964899002238, 'episode_len_mean': 44.80952380952381, 'episode_media': {}, 'episodes_this_iter': 21, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [-53.61248563565584, -44.93351024772356, -51.41114960331489, -21.803290157099887, -9.913875075992781, -44.12577516419403, -56.02219545627752, 99.18102581763432, -3.4581515991408907, -51.60623302078359, -53.530450991781265, -53.33266513885484, -18.692983891955116, -46.27705274558223, -52.83254007623569, -45.94985136837421, -24.92246094601852, -54.8438110574575, -55.893072865224305, -54.721958985637634, -54.14014058080002], 'episode_lengths': [23, 19, 11, 100, 100, 18, 27, 100, 100, 12, 21, 19, 100, 24, 18, 28, 100, 31, 36, 27, 27]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 3.5537882486216383, 'mean_inference_ms': 2.0042037372707346, 'mean_action_processing_ms': 0.3657521688563899, 'mean_env_wait_ms': 7.3906303143363035, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0}",522.449,45.8812,522.449,"{'training_iteration_time_ms': 184.183, 'load_time_ms': 0.301, 'load_throughput': 850690.718, 'learn_time_ms': 35.59, 'learn_throughput': 7193.103, 'synch_weights_time_ms': 2.531}",1676978388,0,20000,20,65686_00000,11.3663


[2m[33m(raylet)[0m [2023-02-21 12:11:17,615 E 3637 3687] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-21_12-10-45_607475_3376 is over 95% full, available space: 123383808; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-21 12:11:27,622 E 3637 3687] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-21_12-10-45_607475_3376 is over 95% full, available space: 123297792; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-21 12:11:37,629 E 3637 3687] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-21_12-10-45_607475_3376 is over 95% full, available space: 123248640; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[36m(RolloutWorker pid=4013)[0m   return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
[2m[36m(RolloutWorker pid=4013)[0m   return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
[2m[36

Problem at: /home/daniel/miniconda3/lib/python3.8/site-packages/ray/air/integrations/wandb.py 309 run


Traceback (most recent call last):
  File "/home/daniel/miniconda3/lib/python3.8/site-packages/wandb/sdk/wandb_init.py", line 1133, in init
    run = wi.init()
  File "/home/daniel/miniconda3/lib/python3.8/site-packages/wandb/sdk/wandb_init.py", line 759, in init
    raise error
wandb.errors.CommError: Error communicating with wandb process, exiting...
For more info see: https://docs.wandb.ai/library/init#init-start-error
[2m[36m(RolloutWorker pid=4013)[0m   return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
[2m[36m(RolloutWorker pid=4013)[0m   return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
[2m[36m(RolloutWorker pid=4013)[0m   return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
[2m[36m(RolloutWorker pid=4013)[0m   return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
[2m[36m(RolloutWorker pid=4013)[0m   return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
[2m[36m(RolloutWorker pid=4013)[0m   return ufunc.reduce(obj, axis, dtype, out, **pass

[2m[36m(RolloutWorker pid=7581)[0m Loaded XML file successfully
[2m[36m(RolloutWorker pid=7581)[0m Loaded XML file successfully


[2m[33m(raylet)[0m [2023-02-21 12:20:18,514 E 3637 3687] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-21_12-10-45_607475_3376 is over 95% full, available space: 114556928; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-21 12:20:28,524 E 3637 3687] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-21_12-10-45_607475_3376 is over 95% full, available space: 114499584; capacity: 31845081088. Object creation will fail if spilling is required.
2023-02-21 12:20:33,636	ERROR trial_runner.py:1088 -- Trial SAC_darm_DarmSFHand-v0_65686_00000: Error processing event.
ray.tune.error._TuneNoNextExecutorEventError: Traceback (most recent call last):
  File "/home/daniel/miniconda3/lib/python3.8/site-packages/ray/tune/execution/ray_trial_executor.py", line 1070, in get_next_executor_event
    future_result = ray.get(ready_future)
  File "/home/daniel/miniconda3/lib/python3.8/site-packages/ray/_private/client_mode

In [16]:
# Ensure wandb is sysncing to cloud
# cd to darm_training again if not

In [10]:
# TODO:
# change: experiment name

# Restore Interrupted run
tuner = tune.Tuner.restore(
    f"{os.getenv('DARM_MUJOCO_PATH')}/darm_training/results/Test_DARMSF_DELTA_TARGET",
    resume_errored=True
)
tuner

2023-02-11 17:26:13,607	INFO experiment_analysis.py:795 -- No `self.trials`. Drawing logdirs from checkpoint file. This may result in some information that is out of sync, as checkpointing is periodic.


<ray.tune.tuner.Tuner at 0x7fede99d9a90>

[2m[33m(raylet)[0m [2023-02-11 17:26:16,601 E 14732 14777] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_17-23-24_266872_14581 is over 95% full, available space: 1061720064; capacity: 31845081088. Object creation will fail if spilling is required.


In [15]:
results = tuner.get_results()
results

<ray.tune.result_grid.ResultGrid at 0x7fcd58384b80>

In [16]:
# Get the best result based on a particular metric.
best_result = results.get_best_result(metric="episode_reward_mean", mode="max")
best_result

Result(metrics={'custom_metrics': {}, 'episode_media': {}, 'info': {'learner': {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 8.343955039978027, 'actor_loss': -5.076763153076172, 'critic_loss': 0.4612053632736206, 'alpha_loss': -0.850807249546051, 'alpha_value': 0.9030595, 'log_alpha_value': -0.101966895, 'target_entropy': -5.0, 'policy_t': -0.01997794397175312, 'mean_q': 2.0334110260009766, 'max_q': 2.8355112075805664, 'min_q': 1.038293480873108}, 'td_error': array([9.46030378e-01, 4.29627061e-01, 2.65497327e-01, 8.43869328e-01,
       1.09686172e+00, 7.66791701e-01, 7.26696014e-01, 5.70532084e-02,
       1.14584994e+00, 4.43507016e-01, 1.04901314e-01, 1.52089047e+00,
       6.52013183e-01, 8.16148460e-01, 1.08409297e+00, 2.61833251e-01,
       3.79876256e-01, 9.56449747e-01, 5.75677335e-01, 1.02149987e+00,
       1.76170349e-01, 9.50863540e-01, 7.04805613e-01, 3.25276971e-01,
       6.36387825e-01, 8.46629441e-01, 5.59558868e-02, 8.83865356e-03,
       

In [17]:
# Get the best checkpoint corresponding to the best result.
best_checkpoint = best_result.checkpoint
best_checkpoint

Checkpoint(local_path=/home/daniel/DARM/darm_mujoco/darm_training/results/Test_DARMSF_DELTA_TARGET/SAC_darm_DarmSFHand-v0_a1dbe_00000_0_2023-02-11_16-49-13/results/Test_DARMSF_DELTA_TARGET/SAC_darm_DarmSFHand-v0_ad8de_00000_0_2023-02-11_16-56-43/checkpoint_000011)

[2m[33m(raylet)[0m [2023-02-11 17:00:32,579 E 9254 9299] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_16-49-09_974082_9002 is over 95% full, available space: 1129005056; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-11 17:00:42,596 E 9254 9299] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_16-49-09_974082_9002 is over 95% full, available space: 1129254912; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-11 17:00:52,615 E 9254 9299] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_16-49-09_974082_9002 is over 95% full, available space: 1129238528; capacity: 31845081088. Object creation will fail if spilling is required.


In [11]:
# Get Algorithm from saved checkpoint
from ray.rllib.algorithms.algorithm import Algorithm
algo = Algorithm.from_checkpoint(best_checkpoint._local_path)
algo

2023-02-11 16:38:32,949	INFO algorithm.py:501 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[33m(raylet)[0m [2023-02-11 16:38:35,741 E 6815 6860] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_16-34-33_438228_6699 is over 95% full, available space: 1174347776; capacity: 31845081088. Object creation will fail if spilling is required.


[2m[36m(RolloutWorker pid=7501)[0m Loaded XML file successfully


[2m[36m(RolloutWorker pid=7501)[0m   logger.warn(
[2m[36m(RolloutWorker pid=7500)[0m   logger.warn(


[2m[36m(RolloutWorker pid=7500)[0m Loaded XML file successfully


[2m[36m(RolloutWorker pid=7502)[0m   logger.warn(


[2m[36m(RolloutWorker pid=7502)[0m Loaded XML file successfully


  logger.warn(


Loaded XML file successfully


SAC

In [11]:
# resume the interrupted run
tuner.fit()

2023-02-11 17:26:26,421	INFO trial_runner.py:688 -- A local experiment checkpoint was found and will be used to restore the previous experiment state.
2023-02-11 17:26:26,422	INFO trial_runner.py:825 -- Using following checkpoint to resume: /home/daniel/DARM/darm_mujoco/darm_training/results/Test_DARMSF_DELTA_TARGET/experiment_state-2023-02-11_17-23-28.json
2023-02-11 17:26:26,440	INFO tune.py:653 -- TrialRunner resumed, ignoring new add_experiment but updating trial resources.


0,1
Current time:,2023-02-11 17:29:20
Running for:,00:02:54.01
Memory:,6.3/7.5 GiB

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
SAC_darm_DarmSFHand-v0_6a944_00000,RUNNING,192.168.152.36:15703,13,227.817,13026,-179.588,-166.097,-189.684,100


[2m[33m(raylet)[0m [2023-02-11 17:26:26,618 E 14732 14777] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_17-23-24_266872_14581 is over 95% full, available space: 1061683200; capacity: 31845081088. Object creation will fail if spilling is required.
[34m[1mwandb[0m: Currently logged in as: [33mdanieladejumo[0m. Use [1m`wandb login --relogin`[0m to force relogin
[2m[36m(SAC pid=15703)[0m 2023-02-11 17:26:31,413	INFO algorithm.py:501 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[33m(raylet)[0m [2023-02-11 17:26:36,627 E 14732 14777] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_17-23-24_266872_14581 is over 95% full, available space: 1061359616; capacity: 31845081088. Object creation will fail if spilling is required.


[2m[36m(RolloutWorker pid=15846)[0m Loaded XML file successfully
[2m[36m(RolloutWorker pid=15844)[0m Loaded XML file successfully


[2m[36m(RolloutWorker pid=15846)[0m   logger.warn(
[2m[36m(RolloutWorker pid=15844)[0m   logger.warn(


[2m[36m(RolloutWorker pid=15845)[0m Loaded XML file successfully


[2m[36m(RolloutWorker pid=15845)[0m   logger.warn(
[2m[36m(SAC pid=15703)[0m   logger.warn(


[2m[36m(SAC pid=15703)[0m Loaded XML file successfully


[2m[36m(SAC pid=15703)[0m 2023-02-11 17:26:40,442	INFO trainable.py:790 -- Restored on 192.168.152.36 from checkpoint: /tmp/checkpoint_tmp_7f50b6e15e2c473dba807bf1d398566d
[2m[36m(SAC pid=15703)[0m 2023-02-11 17:26:40,442	INFO trainable.py:799 -- Current state after restoring: {'_iteration': 11, '_timesteps_total': None, '_time_total': 113.04964661598206, '_episodes_total': 114}
[2m[33m(raylet)[0m [2023-02-11 17:26:46,634 E 14732 14777] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_17-23-24_266872_14581 is over 95% full, available space: 1061335040; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-11 17:26:56,640 E 14732 14777] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_17-23-24_266872_14581 is over 95% full, available space: 1061343232; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-11 17:27:06,648 E 14732 14777] (raylet) f

Trial name,agent_timesteps_total,counters,custom_metrics,date,done,episode_len_mean,episode_media,episode_reward_max,episode_reward_mean,episode_reward_min,episodes_this_iter,episodes_total,experiment_id,hostname,info,iterations_since_restore,node_ip,num_agent_steps_sampled,num_agent_steps_trained,num_env_steps_sampled,num_env_steps_sampled_this_iter,num_env_steps_trained,num_env_steps_trained_this_iter,num_faulty_episodes,num_healthy_workers,num_in_flight_async_reqs,num_remote_worker_restarts,num_steps_trained_this_iter,perf,pid,policy_reward_max,policy_reward_mean,policy_reward_min,sampler_perf,sampler_results,time_since_restore,time_this_iter_s,time_total_s,timers,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
SAC_darm_DarmSFHand-v0_6a944_00000,13026,"{'num_env_steps_sampled': 13026, 'num_env_steps_trained': 258304, 'num_agent_steps_sampled': 13026, 'num_agent_steps_trained': 258304, 'last_target_update_ts': 13026, 'num_target_updates': 1009}",{},2023-02-11_17-28-35,False,100,{},-166.097,-179.588,-189.684,9,132,2674246d3b814ef583cb37ca785123d2,Daniel,"{'learner': {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 8.40356159210205, 'actor_loss': -4.885239601135254, 'critic_loss': 0.3069121241569519, 'alpha_loss': -2.5390048027038574, 'alpha_value': 0.7392387, 'log_alpha_value': -0.30213442, 'target_entropy': -5.0, 'policy_t': -0.029988128691911697, 'mean_q': 2.379087448120117, 'max_q': 3.1470589637756348, 'min_q': 1.5433847904205322}, 'td_error': array([7.4213958e-01, 1.5848637e-01, 6.0251343e-01, 9.3348145e-01,  7.2470105e-01, 6.5075898e-01, 7.4386942e-01, 4.2802992e+00,  4.9475217e-01, 2.1274698e-01, 1.5443254e-01, 2.0181298e-01,  4.8542452e-01, 4.9696553e-01, 3.7915547e+00, 8.3584547e-02,  8.3843565e-01, 7.5096285e-01, 6.2452388e-01, 2.4125576e-01,  7.7261329e-01, 2.6608777e-01, 3.3530772e-01, 2.6860654e-01,  1.5399015e-01, 7.0978558e-01, 7.8079522e-01, 1.0731530e-01,  8.8066232e-01, 1.1126903e+00, 3.6070585e-02, 6.7874563e-01,  7.5406009e-01, 4.2981052e-01, 1.1391871e+00, 3.9740098e-01,  1.0762990e+00, 8.4136343e-01, 5.8252001e-01, 4.0861154e-01,  5.6281984e-01, 2.7024639e-01, 6.9000638e-01, 8.6244369e-01,  5.7595563e-01, 7.2603118e-01, 5.9470689e-01, 2.7473211e-01,  5.6826186e-01, 2.4650784e+02, 9.8598832e-01, 7.3479068e-01,  6.1449623e-01, 1.2699622e+00, 7.5296319e-01, 2.8090358e-02,  9.4109213e-01, 8.2771111e-01, 4.2838442e-01, 3.8090675e+00,  4.7546709e-01, 2.4742079e-01, 4.1203547e-01, 7.3801911e-01,  1.0025257e+00, 6.7763782e-01, 6.7099619e-01, 8.6762822e-01,  5.6190348e-01, 8.8954902e-01, 8.1222010e-01, 8.6386180e-01,  7.6953566e-01, 1.0633967e+00, 5.9996891e-01, 5.3750610e-01,  7.0670819e-01, 4.9724150e-01, 3.3370614e-02, 6.8903613e-01,  9.4764221e-01, 5.0915122e-02, 5.0027347e-01, 9.6055913e-01,  5.5192137e-01, 7.9515433e-01, 7.2671640e-01, 3.9931262e-01,  1.8239129e-01, 9.9649012e-01, 8.4206927e-01, 4.1600978e-01,  4.0527940e-01, 7.6102638e-01, 2.3393106e-01, 4.7766042e-01,  2.2459340e-01, 8.5827851e-01, 1.4306033e-01, 2.4650784e+02,  7.1198571e-01, 3.9922416e+00, 1.2246186e+00, 7.4194229e-01,  2.7496171e-01, 4.5212805e-02, 7.4664807e-01, 1.3847947e-02,  8.7445688e-01, 6.6402781e-01, 1.0255686e+00, 4.5125723e-01,  4.8755097e-01, 2.4650784e+02, 4.4124365e-01, 1.0487792e+00,  5.8346188e-01, 2.6959336e-01, 3.5287654e-01, 5.9907603e-01,  4.8603582e-01, 6.1551094e-01, 6.9831514e-01, 5.1433253e-01,  1.8200487e-01, 9.6122825e-01, 7.8497732e-01, 2.2768998e-01,  9.6964097e-01, 1.4972503e+00, 8.0229974e-01, 1.0484257e+00,  5.5421102e-01, 8.3084774e-01, 4.7661805e-01, 3.9173824e-01,  3.1396019e-01, 4.2802992e+00, 2.7052438e-01, 2.6957560e-01,  7.5368738e-01, 4.4456518e-01, 3.1527257e-01, 8.5121763e-01,  9.0664178e-01, 9.4629610e-01, 5.6297445e-01, 5.9285718e-01,  6.3104606e-01, 5.2718985e-01, 6.5370166e-01, 7.0399725e-01,  4.5417070e-02, 2.4650784e+02, 7.2803473e-01, 1.1245636e+00,  3.7708211e-01, 3.7433398e-01, 4.3422055e-01, 3.2808065e-01,  6.2305951e-01, 1.7103601e-01, 7.9449832e-01, 1.3040452e+00,  7.1471536e-01, 4.5487504e+00, 4.1272748e-01, 6.5745860e-01,  6.6768157e-01, 8.8028562e-01, 7.0535421e-01, 5.2402341e-01,  5.6226981e-01, 5.4202604e-01, 2.7826047e-01, 2.6031137e-01,  6.0549617e-02, 3.6561573e-01, 2.4650784e+02, 8.0606019e-01,  8.4074116e-01, 4.9388194e-01, 7.1800745e-01, 2.9282093e-02,  1.9090211e-01, 3.8544512e-01, 1.4638956e+00, 1.4547678e+00,  1.0922147e+00, 2.6176953e-01, 1.3020796e-01, 5.6222248e-01,  5.6339896e-01, 7.6045167e-01, 7.8438163e-01, 7.5755298e-01,  8.2661462e-01, 3.5743856e-01, 1.3571662e-01, 5.3244066e-01,  8.8719201e-01, 8.2828355e-01, 3.8229942e-01, 6.0678411e-01,  4.7898412e-01, 8.2518208e-01, 5.2971601e-01, 6.7987609e-01,  7.6182199e-01, 1.0264168e+00, 6.2066817e-01, 9.0486789e-01,  4.7908902e-01, 1.1681950e-01, 7.6850456e-01, 3.1422675e-01,  9.3148047e-01, 9.5507002e-01, 8.3421135e-01, 5.6414163e-01,  4.1598296e-01, 5.0719857e-02, 9.6793044e-01, 1.4145180e+00,  1.4200950e-01, 8.1434751e-01, 7.0387411e-01, 8.6176515e-01,  6.2346458e-01, 1.4636874e-01, 3.2455921e-01, 1.5807381e+00,  5.9650755e-01, 7.9351628e-01, 1.6089365e+00, 7.5115800e-01,  5.8976293e-01, 4.7450304e-02, 6.6682827e-01, 7.1542680e-01,  4.6520185e-01, 3.4638846e-01, 7.5957966e-01, 4.9341345e-01,  4.8143768e-01, 1.2025452e-01, 6.0646594e-01, 1.1619196e+00,  2.7393532e-01, 8.4904301e-01, 2.5427663e-01, 7.0259297e-01,  5.2577734e-01, 2.9342413e-01, 6.1365223e-01, 9.0736806e-01],  dtype=float32), 'mean_td_error': 5.492199897766113, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 256.0, 'num_grad_updates_lifetime': 668.0, 'diff_num_grad_updates_vs_sampler_policy': 667.0}}, 'num_env_steps_sampled': 13026, 'num_env_steps_trained': 258304, 'num_agent_steps_sampled': 13026, 'num_agent_steps_trained': 258304, 'last_target_update_ts': 13026, 'num_target_updates': 1009}",2,192.168.152.36,13026,258304,13026,1002,258304,85504,0,3,0,0,85504,"{'cpu_util_percent': 54.76744186046512, 'ram_util_percent': 85.32209302325585}",15703,{},{},{},"{'mean_raw_obs_processing_ms': 1.3155451329359085, 'mean_inference_ms': 2.6820931912181267, 'mean_action_processing_ms': 0.25946855188663404, 'mean_env_wait_ms': 3.287473482817159, 'mean_env_render_ms': 0.0}","{'episode_reward_max': -166.09740307927132, 'episode_reward_min': -189.6840973868966, 'episode_reward_mean': -179.5880893824829, 'episode_len_mean': 100.0, 'episode_media': {}, 'episodes_this_iter': 9, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [-187.3498569726944, -166.09740307927132, -172.9712873697281, -187.82146245241165, -176.65354753285646, -183.53197374939919, -176.7706963941455, -189.6840973868966, -175.4124795049429], 'episode_lengths': [100, 100, 100, 100, 100, 100, 100, 100, 100]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 1.3155451329359085, 'mean_inference_ms': 2.6820931912181267, 'mean_action_processing_ms': 0.25946855188663404, 'mean_env_wait_ms': 3.287473482817159, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0}",114.767,62.318,227.817,"{'training_iteration_time_ms': 151.985, 'load_time_ms': 0.246, 'load_throughput': 1042265.409, 'learn_time_ms': 25.824, 'learn_throughput': 9913.287, 'synch_weights_time_ms': 6.049}",1676132915,0,13026,13,6a944_00000,9.03385


[34m[1mwandb[0m: Adding directory to artifact (/home/daniel/DARM/darm_mujoco/darm_training/results/Test_DARMSF_DELTA_TARGET/SAC_darm_DarmSFHand-v0_6a944_00000_0_2023-02-11_17-23-28/checkpoint_000012)... Done. 0.0s
[2m[33m(raylet)[0m [2023-02-11 17:27:36,665 E 14732 14777] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_17-23-24_266872_14581 is over 95% full, available space: 1055997952; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-11 17:27:46,672 E 14732 14777] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_17-23-24_266872_14581 is over 95% full, available space: 1055973376; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-11 17:27:56,678 E 14732 14777] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_17-23-24_266872_14581 is over 95% full, available space: 1055977472; capacity: 31845081088. Object creation will fai

<ray.tune.result_grid.ResultGrid at 0x7fedc40a1d90>