## Installation and Imports

In [1]:
!pwd

/home/daniel/DARM/darm_mujoco/darm_training


In [2]:
# Configure env variables

# TODO: change path
import os
os.environ["DARM_MUJOCO_PATH"] = "/workspace/darm-mujoco"
os.getenv('DARM_MUJOCO_PATH')

'/home/daniel/DARM/darm_mujoco'

In [3]:
# Check if GCC is installed
!gcc --version

gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
Copyright (C) 2017 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.



In [None]:
# Install GCC if absent
!sudo apt update
!sudo apt install build-essential -y

In [None]:
# # Setup Mujoco for gym - If needed
# !apt-get install -y \
#     libgl1-mesa-dev \
#     libgl1-mesa-glx \
#     libglew-dev \
#     libosmesa6-dev \
#     software-properties-common

# !apt-get install -y patchelf

# !pip install gym

# !pip install free-mujoco-py

# import mujoco_py
# import gym

In [3]:
!pip install ray[rllib] torch
!pip install wandb
!pip install tensorflow_probability



In [4]:
%%bash
cd ..
python setup.py install

/home/daniel/DARM/darm_mujoco
running install
running bdist_egg
running egg_info
writing darm_gym_env.egg-info/PKG-INFO
writing dependency_links to darm_gym_env.egg-info/dependency_links.txt
writing requirements to darm_gym_env.egg-info/requires.txt
writing top-level names to darm_gym_env.egg-info/top_level.txt
reading manifest file 'darm_gym_env.egg-info/SOURCES.txt'
writing manifest file 'darm_gym_env.egg-info/SOURCES.txt'
installing library code to build/bdist.linux-x86_64/egg
running install_lib
running build_py
creating build/bdist.linux-x86_64/egg
creating build/bdist.linux-x86_64/egg/darm_gym_env
copying build/lib/darm_gym_env/darm_sf_gym.py -> build/bdist.linux-x86_64/egg/darm_gym_env
copying build/lib/darm_gym_env/__init__.py -> build/bdist.linux-x86_64/egg/darm_gym_env
copying build/lib/darm_gym_env/multi_darm_gym.py -> build/bdist.linux-x86_64/egg/darm_gym_env
copying build/lib/darm_gym_env/darm_gym.py -> build/bdist.linux-x86_64/egg/darm_gym_env
copying build/lib/darm_gym_e

In [None]:
# Check if mujoco import is successful
import mujoco

In [None]:
# If mujoco import fails, update pandas and restart runtime
!pip install pandas -U

In [None]:
# # If GLFW is absent
# %%bash
# sudo apt-get install libglfw3 -y
# sudo apt-get install libglfw3-dev -y
# pip install --user glfw

In [1]:
import ray
from ray.rllib.algorithms.sac import SACConfig
from ray.tune.registry import register_env
from ray.tune.logger import pretty_print

from ray import air, tune
from ray.air import session
from ray.air.integrations.wandb import setup_wandb
from ray.air.integrations.wandb import WandbLoggerCallback

import gym
from darm_gym_env import DARMEnv

### TODO:
    - Change single_finger=False (In env register and config)
    - Change run_local_dir

## Register Environment with RLlib

In [12]:
def make_env(env_config):
    env = gym.wrappers.TimeLimit(env=DARMEnv(render_mode=None, action_time=0.08, hand_name="hand1", single_finger_env=False), max_episode_steps=200)
    env = gym.wrappers.TransformObservation(env, lambda obs: obs*100)
    return env

env_creator = lambda env_config: make_env(env_config)
register_env("darm/DarmHand-v0", env_creator)

## Configure Run

In [14]:
# TODO:
# change: rollout_workers
# change: gpu

config = (
    SACConfig()
    .environment(
        env="darm/DarmHand-v0",
        normalize_actions=True
    )
    .training(
        q_model_config={
            "fcnet_activation": "relu",
            "fcnet_hiddens": [128, 256, 256, 128] # [256, 256]
        },
        policy_model_config={
            "fcnet_activation": "relu",
            "fcnet_hiddens": [128, 256, 256, 128] # [256, 256]
        },
        tau=0.005,
        target_entropy="auto",
        n_step=1,  # num of SGD steps per batch of data in training step
        train_batch_size=256,
        target_network_update_freq=1,
        replay_buffer_config={"type":"MultiAgentPrioritizedReplayBuffer"},
        num_steps_sampled_before_learning_starts=40_000,  # timesteps (transitions)
        optimization_config={
          "actor_learning_rate": 0.0003,
          "critic_learning_rate": 0.0003,
          "entropy_learning_rate": 0.0003,
        },
        clip_actions=False
    )
    .rollouts(
        num_rollout_workers=3,
        num_envs_per_worker=1,
        rollout_fragment_length=1,
        recreate_failed_workers=True,
        num_consecutive_worker_failures_tolerance=10,
        restart_failed_sub_environments=True,
        # batch_mode="complete_episodes"
    )
    .resources(num_gpus=0)
    .evaluation(evaluation_interval=100) # Training iterations
    .reporting(
        min_sample_timesteps_per_iteration=1000,
        metrics_num_episodes_for_smoothing=100
    )
    .framework(framework="torch")
)

In [15]:
# TODO:
# change: run name
# change: notes
# change: tags
# change: wandb config

run_name = "test1_MF_RLlib_SAC"

notes = """
- The environment was updated such that the target is within a range from the start point
- Velocity penalty was removed and only effort penalty was used
- The reward function was updated according to the reach task reward used in facebookresearch/myosuite [https://github.com/facebookresearch/myosuite/blob/main/myosuite/envs/myo/reach_v0.py]
- The done signal is trigerred only when the fingertip goes beyond a threshold. The episode continues to the maximum timestep otherwise.
- The friction and damping coefficient of the environment is updated. Values are inspired from Deepmind's Mujoco Menagerie [https://github.com/deepmind/mujoco_menagerie/blob/main/shadow_hand/right_hand.xml]
- The range of action from the model was changed to [-1, 1]. This action is mapped to the actual action sent to mujoco e.g [0, 2]]. This change is inspired from values used in OpenAI's Gym Mujoco environments.
- max_episode_steps was updated to 200.
- Velocity vector (size [3,]) was added to observation. Observation size is now (9,)
- Action range was increased to [0, 5]
- Observation warpper to scale observation from m and m/s to cm and cm/s was applied
- Max Tension for Digitorum Extensor Communis was increased to 10
- FIXED: Velocity Observation from (prev_pos - new_pos)/time to (new_pos - prev_pos)/time
- FIXED: Removed weight of 1 from 'sparse', 'solved', and 'done' in reward weighting
- Reduced max_target_th to 5*0.004, 20 mm

- Five-Fingers; No Wrist Environment
- This run was trained on vast_ai using RLlib's SAC algo.
"""

tags = ["five_fingers", "sac", "rllib", "vast_ai"]



wandb_init = dict(
    save_code=True,
    resume=True,
    config={
        "env": config.env,
        "num_rollout_workers": config.num_rollout_workers,
        "num_envs_per_worker": config.num_envs_per_worker,
        "recreate_failed_workers": config.recreate_failed_workers,
        "num_consecutive_worker_failures_tolerance": config.num_consecutive_worker_failures_tolerance,
        "restart_failed_sub_environments": config.restart_failed_sub_environments,
        "num_gpus": config.num_gpus,
        "framework": config.framework_str,
        
        "stop_episode_reward_mean": 1_300,
        "run_local_dir": f"{os.getenv('DARM_MUJOCO_PATH')}/darm_training/results/darm_mf_hand",
        
        "checkpoint_at_end": True,
        "checkpoint_score_attribute": "episode_reward_mean",  # or leave to save last chkpts
        "checkpoint_score_order": "max",
        "checkpoint_frequency": 50,   # iterations
        "num_checkpoints_to_keep": 3,
        "save_checkpoints_to_wandb": True
    },
    tags=tags,
    notes=notes,
    name=run_name
    # job_type=
    # monitor_gym=
)

In [16]:
!pwd

[Errno 2] No such file or directory: '/home/daniel/DARM/darm-mujoco/darm_training/'
/home/daniel/DARM/darm_mujoco/darm_training


## Run

In [17]:
# TODO: 
# change: name
# change: checkpoint_freq

sync_config = tune.SyncConfig()

tuner = tune.Tuner(
    "SAC",
    param_space=config,
    run_config=air.RunConfig(
        name=run_name,
        sync_config=sync_config,
        stop={"episode_reward_mean": wandb_init["config"]["stop_episode_reward_mean"]},
        
        local_dir=wandb_init["config"]["run_local_dir"],
        checkpoint_config=air.CheckpointConfig(
            checkpoint_at_end = wandb_init["config"]["checkpoint_at_end"],
            checkpoint_score_attribute = wandb_init["config"]["checkpoint_score_attribute"],  # or leave to save last chkpts
            checkpoint_score_order = wandb_init["config"]["checkpoint_score_order"],
            checkpoint_frequency = wandb_init["config"]["checkpoint_frequency"],
            num_to_keep = wandb_init["config"]["num_checkpoints_to_keep"]
        ),
        callbacks=[
                WandbLoggerCallback(project="DARM", 
                                    api_key="392c8a47eb0658eb5c71190757a69110e2140f4a",
                                    save_checkpoints=wandb_init["config"]["save_checkpoints_to_wandb"], 
                                    **wandb_init)
            ],
        )
    
)

results = tuner.fit()

0,1
Current time:,2023-03-04 21:39:54
Running for:,00:03:59.69
Memory:,5.6/7.5 GiB

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
SAC_darm_DarmHand-v0_299b4_00000,RUNNING,192.168.93.36:4924,12,223.523,12024,-10.9323,-0.627997,-40.2694,2.43309


[34m[1mwandb[0m: Currently logged in as: [33mdanieladejumo[0m. Use [1m`wandb login --relogin`[0m to force relogin
[2m[36m(SAC pid=4924)[0m 2023-03-04 21:35:59,874	INFO algorithm.py:501 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[33m(raylet)[0m [2023-03-04 21:36:00,254 E 4146 4196] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-03-04_21-33-58_015046_3747 is over 95% full, available space: 322654208; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[36m(RolloutWorker pid=5061)[0m   logger.warn(
[2m[36m(RolloutWorker pid=5061)[0m   return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


[2m[36m(RolloutWorker pid=5061)[0m Loaded XML file successfully
[2m[36m(RolloutWorker pid=5060)[0m Loaded XML file successfully


[2m[36m(RolloutWorker pid=5060)[0m   logger.warn(
[2m[36m(RolloutWorker pid=5060)[0m   return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
[2m[36m(RolloutWorker pid=5062)[0m   logger.warn(
[2m[36m(RolloutWorker pid=5062)[0m   return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


[2m[36m(RolloutWorker pid=5062)[0m Loaded XML file successfully


[2m[33m(raylet)[0m [2023-03-04 21:36:10,261 E 4146 4196] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-03-04_21-33-58_015046_3747 is over 95% full, available space: 315428864; capacity: 31845081088. Object creation will fail if spilling is required.


[2m[36m(SAC pid=4924)[0m Loaded XML file successfully


[2m[36m(SAC pid=4924)[0m   logger.warn(
[2m[36m(SAC pid=4924)[0m   return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
[2m[36m(SAC pid=4924)[0m 2023-03-04 21:36:10,726	INFO trainable.py:172 -- Trainable.setup took 10.854 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(RolloutWorker pid=5061)[0m   return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
[2m[36m(RolloutWorker pid=5060)[0m   return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
[2m[36m(RolloutWorker pid=5062)[0m   return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
[2m[33m(raylet)[0m [2023-03-04 21:36:20,273 E 4146 4196] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-03-04_21-33-58_015046_3747 is over 95% full, available space: 315437056; capacity: 31845081088. Object creation will fail if spilling is required.


Trial name,agent_timesteps_total,counters,custom_metrics,date,done,episode_len_mean,episode_media,episode_reward_max,episode_reward_mean,episode_reward_min,episodes_this_iter,episodes_total,experiment_id,hostname,info,iterations_since_restore,node_ip,num_agent_steps_sampled,num_agent_steps_trained,num_env_steps_sampled,num_env_steps_sampled_this_iter,num_env_steps_trained,num_env_steps_trained_this_iter,num_faulty_episodes,num_healthy_workers,num_in_flight_async_reqs,num_remote_worker_restarts,num_steps_trained_this_iter,perf,pid,policy_reward_max,policy_reward_mean,policy_reward_min,sampler_perf,sampler_results,time_since_restore,time_this_iter_s,time_total_s,timers,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
SAC_darm_DarmHand-v0_299b4_00000,12024,"{'num_env_steps_sampled': 12024, 'num_env_steps_trained': 0, 'num_agent_steps_sampled': 12024, 'num_agent_steps_trained': 0}",{},2023-03-04_21-39-54,False,2.43309,{},-0.627997,-10.9323,-40.2694,411,4942,6ae5418bf11c42359a455754c8e596d2,Daniel,"{'learner': {}, 'num_env_steps_sampled': 12024, 'num_env_steps_trained': 0, 'num_agent_steps_sampled': 12024, 'num_agent_steps_trained': 0}",12,192.168.93.36,12024,0,12024,1002,0,0,0,3,0,0,0,"{'cpu_util_percent': 84.51212121212122, 'ram_util_percent': 73.81818181818181}",4924,{},{},{},"{'mean_raw_obs_processing_ms': 1.7189555971835808, 'mean_inference_ms': 2.836624851613868, 'mean_action_processing_ms': 0.23748462305688628, 'mean_env_wait_ms': 38.10354625510331, 'mean_env_render_ms': 0.0}","{'episode_reward_max': -0.6279969624493544, 'episode_reward_min': -40.26942496194788, 'episode_reward_mean': -10.93225218503461, 'episode_len_mean': 2.4330900243309004, 'episode_media': {}, 'episodes_this_iter': 411, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [-17.03337006494612, -10.086573086192306, -9.268594942861899, -6.954664672300197, -6.2236959227014275, -7.679747960577737, -7.758153312554607, -29.44465157434471, -9.277773504381287, -17.761391827349637, -6.233761240765208, -8.649807442781904, -8.551400925706773, -27.7820039473289, -7.824844616760191, -20.174993567816642, -7.91681248377658, -5.510035827726778, -10.245725374472078, -17.844557599166656, -7.017579551837032, -7.762971965385435, -20.24543469772737, -9.338436582342744, -7.839800297220227, -10.083396272589024, -15.444102096284887, -7.688779691858089, -8.638286065361845, -19.378276304195314, -9.455063900099539, -17.038117944233942, -19.36255033276476, -8.574741034840045, -9.433590224483208, -7.678588062975199, -9.367658256111929, -10.233336948138898, -7.023678510654687, -10.083560056929397, -19.374456540714316, -10.167248949803916, -10.182530581929406, -6.231585420180993, -6.208036844525813, -7.763635413843662, -29.553677620919213, -29.550735804168312, -7.754949511061408, -10.24697058086721, -7.828799971304425, -8.552774263234854, -8.569781723462446, -8.55037539885793, -7.093398412294518, -7.758775541576667, -19.37048110471067, -7.67918243500826, -20.16619664267265, -7.0300905218714185, -10.164944811804252, -9.280963691696602, -8.46552777423611, -7.759154896610403, -9.273504131710144, -15.758448482695309, -6.955308513568548, -7.752248662847352, -17.837941558601578, -6.149250674668578, -7.0677679933154005, -8.561415959151843, -10.15721429025697, -9.453731101558642, -17.769403392633514, -10.15026351248632, -20.19095780151129, -7.040281250288297, -18.571760413250338, -1.4795049233229944, -9.359152974053016, -7.7491988687938775, -6.07196448905806, -7.757052891300494, -6.87740428677129, -7.756627403961076, -8.550000120693483, -7.749370042864334, -19.472415221149028, -19.289760006381375, -9.287997533676073, -3.891503522923161, -5.517328458551317, -4.622776350036059, -16.96102270950553, -7.755648147192817, -7.8213146618005105, -7.013948104752407, -7.753115705792641, -9.451659674659496, -10.255065403925736, -20.172366669667504, -17.81882875077941, -6.969449821710398, -19.375685149025784, -28.572416441240843, -8.644929935615243, -6.149023546310488, -6.955664485728811, -9.419328122570839, -10.089942808404484, -10.173326632348518, -7.767094374710161, -4.068731612153817, -10.182677992621812, -7.677311341536854, -20.175461385904484, -7.850778585708856, -2.941361223293904, -29.378996880798656, -9.282946914313474, -8.563530727644062, -10.164071499352971, -8.64829592232986, -8.473366697333978, -3.833890066166407, -3.9030908000348212, -9.376363569114531, -5.4228670841716085, -4.624659539209944, -9.374902921228959, -9.37151768711921, -10.177476215312666, -7.742808225994112, -7.055249552566452, -17.834914922391608, -6.154661855195046, -16.253946162607896, -8.475435921393625, -16.960216242267457, -19.294150801204257, -9.343253909274043, -10.172020104232843, -5.4157141618179025, -14.641915419310713, -30.173325671701786, -6.96287121275542, -7.029263145708169, -20.169229987513376, -5.49637708821329, -29.457972612441136, -7.828714100256035, -5.428381301551203, -6.217384752194282, -19.381429671168192, -19.370348269787215, -9.344882437193615, -9.368450925096466, -4.7224054652795235, -9.603455143583313, -9.369350723130419, -7.679498559376299, -8.47045894715634, -7.761699902191102, -8.562877040921514, -5.4954067251868, -9.284535315192182, -10.088033700492714, -10.09057709582767, -6.228844323059463, -18.65009842518621, -8.567079679160884, -7.050397622817814, -9.279137771413414, -8.644854303748073, -13.94117502834552, -7.040431511719646, -27.86102424649853, -8.546341449531667, -7.29216503787972, -19.38668323093516, -20.19044802720978, -6.961508394813719, -9.421424728086954, -20.193561277738674, -7.032242650389627, -5.0763974064228545, -6.3017913782594475, -9.282536470204475, -9.283584473981524, -3.898481033394896, -10.08821364881608, -8.564388867448312, -9.37024751379935, -7.115836363596154, -10.165887338820285, -18.578145767301404, -10.689748210113287, -6.219586938403761, -15.367626457140533, -7.911172697955275, -8.547069408137547, -10.069106386075008, -6.151950574236789, -10.159747726459633, -19.459430294725706, -20.16148685446951, -8.545540285027819, -8.566125050041885, -10.084428772586213, -9.344866696522812, -16.235397413526236, -9.367769546286798, -40.26942496194788, -5.428648197005905, -5.339355461936919, -10.146859692562998, -8.55932904766389, -27.209452107011163, -6.310434930708391, -4.638057597425527, -9.3645099421127, -4.551615115577184, -5.352632293427773, -29.394743037085, -8.55619013787139, -5.353757650384619, -9.278413856729323, -8.56482698105992, -7.681904317505049, -9.366232316860271, -10.176660107686573, -9.281082264643134, -30.255178156827103, -7.826578038321564, -6.9475012489153825, -13.891954684510413, -8.562251056914127, -9.282014010966533, -6.329104850266169, -6.233825252158622, -18.562668922410143, -7.033818422975082, -17.768123181201315, -8.571131075167779, -7.756876240647562, -9.279957551619107, -6.234732331543457, -7.740820385649116, -6.968961657150157, -6.228704973627461, -17.93237405525704, -7.853985218176336, -19.28803546297811, -10.153540812209522, -10.164939985399386, -9.284336007293959, -9.3615164297842, -8.476432221281868, -18.639012204869818, -17.037801753600462, -9.365762947565129, -18.6510110070992, -19.432624901729163, -8.555459229123056, -8.483939574356624, -18.64253844590256, -24.639934705921185, -10.09786742003098, -17.768954379609603, -20.336733768351987, -9.373157211823932, -7.061886265843829, -6.310019407309875, -17.846162795249747, -7.676581905106263, -6.957857062266783, -10.092040646553574, -8.476844991308532, -8.555556479071159, -18.640576751688176, -19.482929295676975, -18.57549342829227, -18.57752521143565, -17.770239127573632, -17.98134458459658, -8.654110500522215, -7.749352071433886, -6.868733498607247, -8.548355652113422, -6.956928962840608, -10.166167908196114, -17.76663002428116, -9.279651995844237, -9.349474623526016, -19.287150914157884, -17.765848025014694, -8.746077405358147, -6.150955473827416, -20.18309795809838, -7.76091237593039, -8.569613424058806, -5.35442774961613, -6.15375393384058, -8.477821338087363, -5.348253202075426, -5.435586462810836, -6.239953895529134, -10.23539095117698, -7.683571217503787, -9.359883258392601, -9.443339761335913, -0.6279969624493544, -6.961387534538577, -10.26894154013454, -4.636329513527055, -10.164335022431043, -8.48468899297919, -8.566932911605265, -10.320121073808952, -8.562699784185785, -20.172368190497565, -6.949160806824289, -7.033843849194889, -4.618787467036579, -9.438605371828983, -9.362866936134585, -6.231969207157837, -8.638025575157947, -18.576696250799667, -9.350273543900132, -10.167345475908927, -8.559049967642158, -16.97549059048435, -7.038955384125698, -9.53349918027707, -8.550980486047402, -7.75183285258247, -8.54711149711578, -10.18432365291388, -20.099769830985526, -8.477296355188184, -10.176407763327095, -10.167827945982946, -10.093120626341795, -8.62124556017745, -9.45654285039434, -9.286537223111088, -8.722121453632095, -7.7642211347909535, -18.645290680213133, -3.8885668249587244, -18.638822192173187, -16.969316248646287, -20.16105420440168, -18.002556913891645, -6.971349603174012, -10.09051382099759, -5.521276077721586, -7.181773308861706, -19.372334023906106, -17.03293078019957, -10.159937804701507, -10.488083502068505, -3.115724695135807, -9.35201618910137, -8.918026102779326, -6.947102033994062, -6.234088782589271, -8.5574370311192, -9.276382874898076, -20.178271570730665, -17.78406237004711, -7.762014114293888, -9.448653531178612, -17.783876147086403, -8.562788269211609, -8.571612318983798, -19.364109993872837, -8.561014544537182, -19.378491945933582, -9.27582546068788, -6.32819706323409, -6.960392502978412, -10.082576491111514, -10.15631406853519, -25.448828986078304, -9.370797550223369, -7.762985427023413, -18.575351863742327, -2.2875958702516392, -9.666473692063859, -17.049444194152564, -9.442909100553962, -9.356759331620015, -9.364517055684376, -3.2651316133389656, -7.756917166643838, -7.108071655509557, -15.434812831315224, -7.829441354858219, -7.037796904877021, -6.969818243118125, -9.274795214450464, -10.234123997431263, -9.347094225506197, -17.820541461415775, -6.234950304433529, -10.088614827654073, -10.173046784405562, -8.470277457567676], 'episode_lengths': [3, 1, 1, 2, 3, 1, 2, 3, 1, 2, 3, 3, 2, 2, 3, 2, 4, 4, 3, 3, 3, 2, 3, 2, 3, 1, 3, 1, 3, 2, 3, 3, 2, 2, 3, 1, 2, 3, 3, 1, 2, 2, 2, 3, 3, 2, 4, 4, 2, 3, 3, 2, 2, 2, 4, 2, 2, 1, 2, 3, 2, 1, 1, 2, 1, 7, 2, 2, 3, 2, 3, 2, 2, 3, 2, 2, 2, 3, 2, 4, 2, 2, 1, 2, 1, 2, 2, 2, 3, 1, 1, 4, 4, 3, 2, 2, 3, 3, 2, 3, 3, 2, 3, 2, 2, 2, 3, 2, 2, 3, 1, 2, 2, 6, 2, 1, 2, 3, 2, 2, 1, 2, 2, 3, 1, 3, 4, 2, 3, 3, 2, 2, 2, 2, 3, 3, 2, 3, 1, 2, 1, 2, 2, 3, 3, 2, 2, 3, 2, 4, 3, 3, 3, 3, 2, 2, 2, 2, 4, 5, 2, 1, 1, 2, 2, 4, 1, 1, 1, 3, 3, 2, 3, 1, 3, 4, 3, 3, 2, 6, 2, 2, 2, 3, 2, 3, 9, 4, 1, 1, 4, 1, 2, 2, 4, 2, 2, 4, 3, 2, 4, 2, 1, 2, 2, 3, 2, 2, 2, 1, 2, 3, 2, 3, 3, 2, 2, 2, 5, 4, 3, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 1, 3, 3, 2, 4, 2, 1, 4, 3, 2, 3, 2, 2, 2, 1, 3, 2, 2, 3, 4, 3, 1, 2, 2, 1, 2, 1, 3, 3, 2, 3, 3, 2, 1, 3, 3, 1, 2, 4, 2, 3, 4, 3, 1, 2, 1, 1, 2, 3, 3, 2, 2, 2, 5, 3, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 4, 2, 2, 2, 2, 2, 2, 1, 2, 3, 3, 3, 1, 2, 3, 3, 2, 3, 3, 2, 1, 2, 4, 2, 2, 2, 3, 3, 3, 2, 3, 3, 2, 2, 2, 2, 2, 3, 4, 2, 2, 2, 2, 1, 1, 2, 2, 1, 3, 3, 1, 4, 2, 3, 4, 3, 2, 2, 5, 2, 1, 4, 5, 2, 3, 2, 6, 4, 2, 6, 2, 3, 2, 1, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 1, 4, 2, 1, 2, 3, 2, 2, 2, 4, 16, 3, 3, 2, 2, 6, 2, 4, 3, 3, 3, 2, 1, 3, 2, 3, 3, 1, 2, 1]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 1.7189555971835808, 'mean_inference_ms': 2.836624851613868, 'mean_action_processing_ms': 0.23748462305688628, 'mean_env_wait_ms': 38.10354625510331, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0}",223.523,22.9466,223.523,{'training_iteration_time_ms': 83.832},1677962394,0,12024,12,299b4_00000,10.8584


[2m[33m(raylet)[0m [2023-03-04 21:36:30,285 E 4146 4196] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-03-04_21-33-58_015046_3747 is over 95% full, available space: 318902272; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-03-04 21:36:40,292 E 4146 4196] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-03-04_21-33-58_015046_3747 is over 95% full, available space: 318791680; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-03-04 21:36:50,297 E 4146 4196] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-03-04_21-33-58_015046_3747 is over 95% full, available space: 318074880; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-03-04 21:37:00,303 E 4146 4196] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-03-04_21-33-58_015046_3747 is over 95% full, available space: 317464576; capacity: 

In [16]:
# Ensure wandb is sysncing to cloud
# cd to darm_training again if not

In [10]:
# TODO:
# change: experiment name

# Restore Interrupted run
tuner = tune.Tuner.restore(
    f"{wandb_init['config']['run_local_dir']}/{run_name}",
    resume_errored=True
)
tuner

2023-02-11 17:26:13,607	INFO experiment_analysis.py:795 -- No `self.trials`. Drawing logdirs from checkpoint file. This may result in some information that is out of sync, as checkpointing is periodic.


<ray.tune.tuner.Tuner at 0x7fede99d9a90>

[2m[33m(raylet)[0m [2023-02-11 17:26:16,601 E 14732 14777] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_17-23-24_266872_14581 is over 95% full, available space: 1061720064; capacity: 31845081088. Object creation will fail if spilling is required.


In [15]:
results = tuner.get_results()
results

<ray.tune.result_grid.ResultGrid at 0x7fcd58384b80>

In [16]:
# Get the best result based on a particular metric.
best_result = results.get_best_result(metric="episode_reward_mean", mode="max")
best_result

Result(metrics={'custom_metrics': {}, 'episode_media': {}, 'info': {'learner': {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 8.343955039978027, 'actor_loss': -5.076763153076172, 'critic_loss': 0.4612053632736206, 'alpha_loss': -0.850807249546051, 'alpha_value': 0.9030595, 'log_alpha_value': -0.101966895, 'target_entropy': -5.0, 'policy_t': -0.01997794397175312, 'mean_q': 2.0334110260009766, 'max_q': 2.8355112075805664, 'min_q': 1.038293480873108}, 'td_error': array([9.46030378e-01, 4.29627061e-01, 2.65497327e-01, 8.43869328e-01,
       1.09686172e+00, 7.66791701e-01, 7.26696014e-01, 5.70532084e-02,
       1.14584994e+00, 4.43507016e-01, 1.04901314e-01, 1.52089047e+00,
       6.52013183e-01, 8.16148460e-01, 1.08409297e+00, 2.61833251e-01,
       3.79876256e-01, 9.56449747e-01, 5.75677335e-01, 1.02149987e+00,
       1.76170349e-01, 9.50863540e-01, 7.04805613e-01, 3.25276971e-01,
       6.36387825e-01, 8.46629441e-01, 5.59558868e-02, 8.83865356e-03,
       

In [17]:
# Get the best checkpoint corresponding to the best result.
best_checkpoint = best_result.checkpoint
best_checkpoint

Checkpoint(local_path=/home/daniel/DARM/darm_mujoco/darm_training/results/Test_DARMSF_DELTA_TARGET/SAC_darm_DarmSFHand-v0_a1dbe_00000_0_2023-02-11_16-49-13/results/Test_DARMSF_DELTA_TARGET/SAC_darm_DarmSFHand-v0_ad8de_00000_0_2023-02-11_16-56-43/checkpoint_000011)

[2m[33m(raylet)[0m [2023-02-11 17:00:32,579 E 9254 9299] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_16-49-09_974082_9002 is over 95% full, available space: 1129005056; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-11 17:00:42,596 E 9254 9299] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_16-49-09_974082_9002 is over 95% full, available space: 1129254912; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-11 17:00:52,615 E 9254 9299] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_16-49-09_974082_9002 is over 95% full, available space: 1129238528; capacity: 31845081088. Object creation will fail if spilling is required.


In [11]:
# Get Algorithm from saved checkpoint
from ray.rllib.algorithms.algorithm import Algorithm
algo = Algorithm.from_checkpoint(best_checkpoint._local_path)
algo

2023-02-11 16:38:32,949	INFO algorithm.py:501 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[33m(raylet)[0m [2023-02-11 16:38:35,741 E 6815 6860] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_16-34-33_438228_6699 is over 95% full, available space: 1174347776; capacity: 31845081088. Object creation will fail if spilling is required.


[2m[36m(RolloutWorker pid=7501)[0m Loaded XML file successfully


[2m[36m(RolloutWorker pid=7501)[0m   logger.warn(
[2m[36m(RolloutWorker pid=7500)[0m   logger.warn(


[2m[36m(RolloutWorker pid=7500)[0m Loaded XML file successfully


[2m[36m(RolloutWorker pid=7502)[0m   logger.warn(


[2m[36m(RolloutWorker pid=7502)[0m Loaded XML file successfully


  logger.warn(


Loaded XML file successfully


SAC

In [11]:
# resume the interrupted run
tuner.fit()

2023-02-11 17:26:26,421	INFO trial_runner.py:688 -- A local experiment checkpoint was found and will be used to restore the previous experiment state.
2023-02-11 17:26:26,422	INFO trial_runner.py:825 -- Using following checkpoint to resume: /home/daniel/DARM/darm_mujoco/darm_training/results/Test_DARMSF_DELTA_TARGET/experiment_state-2023-02-11_17-23-28.json
2023-02-11 17:26:26,440	INFO tune.py:653 -- TrialRunner resumed, ignoring new add_experiment but updating trial resources.


0,1
Current time:,2023-02-11 17:29:20
Running for:,00:02:54.01
Memory:,6.3/7.5 GiB

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
SAC_darm_DarmSFHand-v0_6a944_00000,RUNNING,192.168.152.36:15703,13,227.817,13026,-179.588,-166.097,-189.684,100


[2m[33m(raylet)[0m [2023-02-11 17:26:26,618 E 14732 14777] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_17-23-24_266872_14581 is over 95% full, available space: 1061683200; capacity: 31845081088. Object creation will fail if spilling is required.
[34m[1mwandb[0m: Currently logged in as: [33mdanieladejumo[0m. Use [1m`wandb login --relogin`[0m to force relogin
[2m[36m(SAC pid=15703)[0m 2023-02-11 17:26:31,413	INFO algorithm.py:501 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[33m(raylet)[0m [2023-02-11 17:26:36,627 E 14732 14777] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_17-23-24_266872_14581 is over 95% full, available space: 1061359616; capacity: 31845081088. Object creation will fail if spilling is required.


[2m[36m(RolloutWorker pid=15846)[0m Loaded XML file successfully
[2m[36m(RolloutWorker pid=15844)[0m Loaded XML file successfully


[2m[36m(RolloutWorker pid=15846)[0m   logger.warn(
[2m[36m(RolloutWorker pid=15844)[0m   logger.warn(


[2m[36m(RolloutWorker pid=15845)[0m Loaded XML file successfully


[2m[36m(RolloutWorker pid=15845)[0m   logger.warn(
[2m[36m(SAC pid=15703)[0m   logger.warn(


[2m[36m(SAC pid=15703)[0m Loaded XML file successfully


[2m[36m(SAC pid=15703)[0m 2023-02-11 17:26:40,442	INFO trainable.py:790 -- Restored on 192.168.152.36 from checkpoint: /tmp/checkpoint_tmp_7f50b6e15e2c473dba807bf1d398566d
[2m[36m(SAC pid=15703)[0m 2023-02-11 17:26:40,442	INFO trainable.py:799 -- Current state after restoring: {'_iteration': 11, '_timesteps_total': None, '_time_total': 113.04964661598206, '_episodes_total': 114}
[2m[33m(raylet)[0m [2023-02-11 17:26:46,634 E 14732 14777] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_17-23-24_266872_14581 is over 95% full, available space: 1061335040; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-11 17:26:56,640 E 14732 14777] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_17-23-24_266872_14581 is over 95% full, available space: 1061343232; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-11 17:27:06,648 E 14732 14777] (raylet) f

Trial name,agent_timesteps_total,counters,custom_metrics,date,done,episode_len_mean,episode_media,episode_reward_max,episode_reward_mean,episode_reward_min,episodes_this_iter,episodes_total,experiment_id,hostname,info,iterations_since_restore,node_ip,num_agent_steps_sampled,num_agent_steps_trained,num_env_steps_sampled,num_env_steps_sampled_this_iter,num_env_steps_trained,num_env_steps_trained_this_iter,num_faulty_episodes,num_healthy_workers,num_in_flight_async_reqs,num_remote_worker_restarts,num_steps_trained_this_iter,perf,pid,policy_reward_max,policy_reward_mean,policy_reward_min,sampler_perf,sampler_results,time_since_restore,time_this_iter_s,time_total_s,timers,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
SAC_darm_DarmSFHand-v0_6a944_00000,13026,"{'num_env_steps_sampled': 13026, 'num_env_steps_trained': 258304, 'num_agent_steps_sampled': 13026, 'num_agent_steps_trained': 258304, 'last_target_update_ts': 13026, 'num_target_updates': 1009}",{},2023-02-11_17-28-35,False,100,{},-166.097,-179.588,-189.684,9,132,2674246d3b814ef583cb37ca785123d2,Daniel,"{'learner': {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 8.40356159210205, 'actor_loss': -4.885239601135254, 'critic_loss': 0.3069121241569519, 'alpha_loss': -2.5390048027038574, 'alpha_value': 0.7392387, 'log_alpha_value': -0.30213442, 'target_entropy': -5.0, 'policy_t': -0.029988128691911697, 'mean_q': 2.379087448120117, 'max_q': 3.1470589637756348, 'min_q': 1.5433847904205322}, 'td_error': array([7.4213958e-01, 1.5848637e-01, 6.0251343e-01, 9.3348145e-01,  7.2470105e-01, 6.5075898e-01, 7.4386942e-01, 4.2802992e+00,  4.9475217e-01, 2.1274698e-01, 1.5443254e-01, 2.0181298e-01,  4.8542452e-01, 4.9696553e-01, 3.7915547e+00, 8.3584547e-02,  8.3843565e-01, 7.5096285e-01, 6.2452388e-01, 2.4125576e-01,  7.7261329e-01, 2.6608777e-01, 3.3530772e-01, 2.6860654e-01,  1.5399015e-01, 7.0978558e-01, 7.8079522e-01, 1.0731530e-01,  8.8066232e-01, 1.1126903e+00, 3.6070585e-02, 6.7874563e-01,  7.5406009e-01, 4.2981052e-01, 1.1391871e+00, 3.9740098e-01,  1.0762990e+00, 8.4136343e-01, 5.8252001e-01, 4.0861154e-01,  5.6281984e-01, 2.7024639e-01, 6.9000638e-01, 8.6244369e-01,  5.7595563e-01, 7.2603118e-01, 5.9470689e-01, 2.7473211e-01,  5.6826186e-01, 2.4650784e+02, 9.8598832e-01, 7.3479068e-01,  6.1449623e-01, 1.2699622e+00, 7.5296319e-01, 2.8090358e-02,  9.4109213e-01, 8.2771111e-01, 4.2838442e-01, 3.8090675e+00,  4.7546709e-01, 2.4742079e-01, 4.1203547e-01, 7.3801911e-01,  1.0025257e+00, 6.7763782e-01, 6.7099619e-01, 8.6762822e-01,  5.6190348e-01, 8.8954902e-01, 8.1222010e-01, 8.6386180e-01,  7.6953566e-01, 1.0633967e+00, 5.9996891e-01, 5.3750610e-01,  7.0670819e-01, 4.9724150e-01, 3.3370614e-02, 6.8903613e-01,  9.4764221e-01, 5.0915122e-02, 5.0027347e-01, 9.6055913e-01,  5.5192137e-01, 7.9515433e-01, 7.2671640e-01, 3.9931262e-01,  1.8239129e-01, 9.9649012e-01, 8.4206927e-01, 4.1600978e-01,  4.0527940e-01, 7.6102638e-01, 2.3393106e-01, 4.7766042e-01,  2.2459340e-01, 8.5827851e-01, 1.4306033e-01, 2.4650784e+02,  7.1198571e-01, 3.9922416e+00, 1.2246186e+00, 7.4194229e-01,  2.7496171e-01, 4.5212805e-02, 7.4664807e-01, 1.3847947e-02,  8.7445688e-01, 6.6402781e-01, 1.0255686e+00, 4.5125723e-01,  4.8755097e-01, 2.4650784e+02, 4.4124365e-01, 1.0487792e+00,  5.8346188e-01, 2.6959336e-01, 3.5287654e-01, 5.9907603e-01,  4.8603582e-01, 6.1551094e-01, 6.9831514e-01, 5.1433253e-01,  1.8200487e-01, 9.6122825e-01, 7.8497732e-01, 2.2768998e-01,  9.6964097e-01, 1.4972503e+00, 8.0229974e-01, 1.0484257e+00,  5.5421102e-01, 8.3084774e-01, 4.7661805e-01, 3.9173824e-01,  3.1396019e-01, 4.2802992e+00, 2.7052438e-01, 2.6957560e-01,  7.5368738e-01, 4.4456518e-01, 3.1527257e-01, 8.5121763e-01,  9.0664178e-01, 9.4629610e-01, 5.6297445e-01, 5.9285718e-01,  6.3104606e-01, 5.2718985e-01, 6.5370166e-01, 7.0399725e-01,  4.5417070e-02, 2.4650784e+02, 7.2803473e-01, 1.1245636e+00,  3.7708211e-01, 3.7433398e-01, 4.3422055e-01, 3.2808065e-01,  6.2305951e-01, 1.7103601e-01, 7.9449832e-01, 1.3040452e+00,  7.1471536e-01, 4.5487504e+00, 4.1272748e-01, 6.5745860e-01,  6.6768157e-01, 8.8028562e-01, 7.0535421e-01, 5.2402341e-01,  5.6226981e-01, 5.4202604e-01, 2.7826047e-01, 2.6031137e-01,  6.0549617e-02, 3.6561573e-01, 2.4650784e+02, 8.0606019e-01,  8.4074116e-01, 4.9388194e-01, 7.1800745e-01, 2.9282093e-02,  1.9090211e-01, 3.8544512e-01, 1.4638956e+00, 1.4547678e+00,  1.0922147e+00, 2.6176953e-01, 1.3020796e-01, 5.6222248e-01,  5.6339896e-01, 7.6045167e-01, 7.8438163e-01, 7.5755298e-01,  8.2661462e-01, 3.5743856e-01, 1.3571662e-01, 5.3244066e-01,  8.8719201e-01, 8.2828355e-01, 3.8229942e-01, 6.0678411e-01,  4.7898412e-01, 8.2518208e-01, 5.2971601e-01, 6.7987609e-01,  7.6182199e-01, 1.0264168e+00, 6.2066817e-01, 9.0486789e-01,  4.7908902e-01, 1.1681950e-01, 7.6850456e-01, 3.1422675e-01,  9.3148047e-01, 9.5507002e-01, 8.3421135e-01, 5.6414163e-01,  4.1598296e-01, 5.0719857e-02, 9.6793044e-01, 1.4145180e+00,  1.4200950e-01, 8.1434751e-01, 7.0387411e-01, 8.6176515e-01,  6.2346458e-01, 1.4636874e-01, 3.2455921e-01, 1.5807381e+00,  5.9650755e-01, 7.9351628e-01, 1.6089365e+00, 7.5115800e-01,  5.8976293e-01, 4.7450304e-02, 6.6682827e-01, 7.1542680e-01,  4.6520185e-01, 3.4638846e-01, 7.5957966e-01, 4.9341345e-01,  4.8143768e-01, 1.2025452e-01, 6.0646594e-01, 1.1619196e+00,  2.7393532e-01, 8.4904301e-01, 2.5427663e-01, 7.0259297e-01,  5.2577734e-01, 2.9342413e-01, 6.1365223e-01, 9.0736806e-01],  dtype=float32), 'mean_td_error': 5.492199897766113, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 256.0, 'num_grad_updates_lifetime': 668.0, 'diff_num_grad_updates_vs_sampler_policy': 667.0}}, 'num_env_steps_sampled': 13026, 'num_env_steps_trained': 258304, 'num_agent_steps_sampled': 13026, 'num_agent_steps_trained': 258304, 'last_target_update_ts': 13026, 'num_target_updates': 1009}",2,192.168.152.36,13026,258304,13026,1002,258304,85504,0,3,0,0,85504,"{'cpu_util_percent': 54.76744186046512, 'ram_util_percent': 85.32209302325585}",15703,{},{},{},"{'mean_raw_obs_processing_ms': 1.3155451329359085, 'mean_inference_ms': 2.6820931912181267, 'mean_action_processing_ms': 0.25946855188663404, 'mean_env_wait_ms': 3.287473482817159, 'mean_env_render_ms': 0.0}","{'episode_reward_max': -166.09740307927132, 'episode_reward_min': -189.6840973868966, 'episode_reward_mean': -179.5880893824829, 'episode_len_mean': 100.0, 'episode_media': {}, 'episodes_this_iter': 9, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [-187.3498569726944, -166.09740307927132, -172.9712873697281, -187.82146245241165, -176.65354753285646, -183.53197374939919, -176.7706963941455, -189.6840973868966, -175.4124795049429], 'episode_lengths': [100, 100, 100, 100, 100, 100, 100, 100, 100]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 1.3155451329359085, 'mean_inference_ms': 2.6820931912181267, 'mean_action_processing_ms': 0.25946855188663404, 'mean_env_wait_ms': 3.287473482817159, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0}",114.767,62.318,227.817,"{'training_iteration_time_ms': 151.985, 'load_time_ms': 0.246, 'load_throughput': 1042265.409, 'learn_time_ms': 25.824, 'learn_throughput': 9913.287, 'synch_weights_time_ms': 6.049}",1676132915,0,13026,13,6a944_00000,9.03385


[34m[1mwandb[0m: Adding directory to artifact (/home/daniel/DARM/darm_mujoco/darm_training/results/Test_DARMSF_DELTA_TARGET/SAC_darm_DarmSFHand-v0_6a944_00000_0_2023-02-11_17-23-28/checkpoint_000012)... Done. 0.0s
[2m[33m(raylet)[0m [2023-02-11 17:27:36,665 E 14732 14777] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_17-23-24_266872_14581 is over 95% full, available space: 1055997952; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-11 17:27:46,672 E 14732 14777] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_17-23-24_266872_14581 is over 95% full, available space: 1055973376; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-11 17:27:56,678 E 14732 14777] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-11_17-23-24_266872_14581 is over 95% full, available space: 1055977472; capacity: 31845081088. Object creation will fai

<ray.tune.result_grid.ResultGrid at 0x7fedc40a1d90>