## Installation and Imports

In [None]:
!pwd

In [None]:
# Configure env variables
import os
os.environ["DARM_MUJOCO_PATH"] = "/home/daniel/DARM/darm_mujoco"

In [None]:
!python setup.py install

In [None]:
!pip install ray[rllib] torch

In [None]:
import ray
from ray.rllib.algorithms.sac import SACConfig
from ray.tune.registry import register_env
from ray.tune.logger import pretty_print

from ray import air, tune
from ray.air import session
from ray.air.integrations.wandb import setup_wandb
from ray.air.integrations.wandb import WandbLoggerCallback

import gym
from darm_gym_env import DARMSFEnv

## Register Environment with RLlib

In [None]:
# env_creator = lambda env_config: gym.make("darm/DarmSFHand-v0", render_mode=None, hand_name="hand1") # DARMSFEnv(render_mode=None, reaction_time=0.08, hand_name="hand1") # 

def make_env(env_config):
    env = gym.wrappers.TimeLimit(env=DARMSFEnv(render_mode=None, reaction_time=0.08, hand_name="hand1"), max_episode_steps=100)
    return env
env_creator = lambda env_config: make_env(env_config) #gym.wrappers.TimeLimit(env=DARMSFEnv(render_mode=None, reaction_time=0.08, hand_name="hand1"), max_episode_steps=100)

register_env("darm/DarmSFHand-v0", env_creator)

## Configure and Run

In [None]:
config = (
    SACConfig()
    .environment(
        env="darm/DarmSFHand-v0",
        normalize_actions=True
    )
    .training(
        q_model_config={
            "fcnet_activation": "relu",
            "fcnet_hiddens": [256, 256]
        },
        policy_model_config={
            "fcnet_activation": "relu",
            "fcnet_hiddens": [256, 256]
        },
        tau=0.005,
        target_entropy="auto",
        n_step=1,  # num of SGD steps per batch of data in training step
        train_batch_size=256,
        target_network_update_freq=1,
        replay_buffer_config={"type":"MultiAgentPrioritizedReplayBuffer"},
        num_steps_sampled_before_learning_starts=10_000,
        optimization_config={
          "actor_learning_rate": 0.0003,
          "critic_learning_rate": 0.0003,
          "entropy_learning_rate": 0.0003,
        },
        clip_actions=False
    )
    .rollouts(
        num_rollout_workers=3,
        rollout_fragment_length=1,
    )
    .resources(num_gpus=0)
    .evaluation(evaluation_interval=100) # For 1000 timesteps iter; 100 evals
    .reporting(
        min_sample_timesteps_per_iteration=1000,
        metrics_num_episodes_for_smoothing=5
    )
    .framework(framework="torch")
)

In [None]:
wandb_init = dict(
    save_code=True,
    config={
        "env": "DARMSFHand-v0",
        
        "actor_learning_rate": 0.0003,
        "critic_learning_rate": 0.0003,
        "entropy_learning_rate": 0.0003,
        "framework": "torch",
        
        "num_rollout_workers": 3,
        "num_gpu": 0,
        "metrics_num_episodes_for_smoothing": 5
    },
    tags=["single_finger"],
    notes="Fixed the env to use targets that are delta increaments from the starting state. Removed velocity penalty, and used only effort penalty",
    name="Test_DARMSF_DELTA_TARGET"
    # job_type=D
    # monitor_gym=
)

In [None]:
tuner = tune.Tuner(
    "SAC",
    run_config=air.RunConfig(
        name="Test_DARMSF_DELTA_TARGET",
        stop={"training_iteration": 10_000, "episode_reward_mean": 200},
        checkpoint_config=air.CheckpointConfig(checkpoint_at_end=True),
        callbacks=[
                WandbLoggerCallback(project="DARM", save_checkpoints=True, **wandb_init)
            ],
        local_dir="./results"
        ),
    param_space=config
)

results = tuner.fit()

2023-02-09 14:34:37,372	INFO worker.py:1538 -- Started a local Ray instance.
2023-02-09 14:34:38,879	INFO wandb.py:250 -- Already logged into W&B.


0,1
Current time:,2023-02-09 15:53:00
Running for:,01:18:21.71
Memory:,6.8/7.5 GiB

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
SAC_darm_DarmSFHand-v0_80340_00000,RUNNING,192.168.152.36:27220,97,4664.71,97193,-169.955,-156.183,-191.286,100


[2m[36m(SAC pid=27220)[0m 2023-02-09 14:34:42,995	INFO algorithm.py:501 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[34m[1mwandb[0m: Currently logged in as: [33mdanieladejumo[0m. Use [1m`wandb login --relogin`[0m to force relogin
[2m[33m(raylet)[0m [2023-02-09 14:34:47,263 E 26959 27004] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-09_14-34-35_158333_26854 is over 95% full, available space: 1199403008; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[36m(RolloutWorker pid=27324)[0m   logger.warn(


[2m[36m(RolloutWorker pid=27324)[0m Loaded XML file successfully


[2m[36m(RolloutWorker pid=27325)[0m   logger.warn(


[2m[36m(RolloutWorker pid=27325)[0m Loaded XML file successfully


[2m[36m(RolloutWorker pid=27327)[0m   logger.warn(


[2m[36m(RolloutWorker pid=27327)[0m Loaded XML file successfully
[2m[36m(SAC pid=27220)[0m Loaded XML file successfully


[2m[36m(SAC pid=27220)[0m   logger.warn(


Trial name,agent_timesteps_total,counters,custom_metrics,date,done,episode_len_mean,episode_media,episode_reward_max,episode_reward_mean,episode_reward_min,episodes_this_iter,episodes_total,experiment_id,hostname,info,iterations_since_restore,node_ip,num_agent_steps_sampled,num_agent_steps_trained,num_env_steps_sampled,num_env_steps_sampled_this_iter,num_env_steps_trained,num_env_steps_trained_this_iter,num_faulty_episodes,num_healthy_workers,num_in_flight_async_reqs,num_remote_worker_restarts,num_steps_trained_this_iter,perf,pid,policy_reward_max,policy_reward_mean,policy_reward_min,sampler_perf,sampler_results,time_since_restore,time_this_iter_s,time_total_s,timers,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
SAC_darm_DarmSFHand-v0_80340_00000,97193,"{'num_env_steps_sampled': 97193, 'num_env_steps_trained': 7473664, 'num_agent_steps_sampled': 97193, 'num_agent_steps_trained': 7473664, 'last_target_update_ts': 97193, 'num_target_updates': 29194}",{},2023-02-09_15-52-40,False,100,{},-156.183,-169.955,-191.286,10,1000,4d5d6cddaea7444681811f5901f7f828,Daniel,"{'learner': {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 0.8870038390159607, 'actor_loss': 115.60578918457031, 'critic_loss': 1.3008311986923218, 'alpha_loss': 3.3117122650146484, 'alpha_value': 0.023906756, 'log_alpha_value': -3.7335942, 'target_entropy': -5.0, 'policy_t': -0.3005792200565338, 'mean_q': -115.54031372070312, 'max_q': -109.50465393066406, 'min_q': -120.56295013427734}, 'td_error': array([1.13542435e+02, 6.62643433e-01, 3.84193420e-01, 4.04376984e-01,  3.79165649e-01, 3.63325256e+02, 1.02663422e+00, 1.08526947e+02,  8.52428436e-01, 2.67391205e-01, 1.13053459e+02, 7.90931702e-01,  1.15930222e+02, 1.56780243e-01, 9.57183838e-02, 5.51700592e-01,  9.85862732e-01, 1.16890625e+02, 6.34422302e-01, 1.68270111e-01,  2.48256683e-01, 2.60269165e-01, 4.54902649e-02, 5.14106750e-01,  7.52212524e-01, 4.96482849e-02, 1.04898071e+00, 1.30870819e-01,  3.24466705e-01, 1.12543968e+02, 6.52294159e-01, 1.82029724e-01,  1.15194336e+02, 1.15242661e+02, 1.72790527e-01, 1.16183395e+02,  1.12319946e-01, 1.08547775e+02, 1.12725357e+02, 1.76364899e-01,  5.36037445e-01, 5.22018433e-01, 6.19785309e-01, 6.77795410e-02,  1.60896301e-01, 2.84992218e-01, 3.60715271e+02, 7.03918457e-01,  1.11521637e+02, 1.16034241e+02, 1.15409256e+02, 5.98651886e-01,  1.09491898e+02, 2.17414856e-01, 2.80864716e-01, 1.12301170e+02,  6.30035400e-01, 1.69792175e-01, 5.68473816e-01, 1.56742096e-01,  4.20944214e-01, 6.39896393e-01, 6.21528625e-01, 1.08990143e+02,  1.17043907e+02, 3.48239899e-01, 5.00732422e-01, 3.30955505e-01,  9.29294586e-01, 1.13521973e+02, 9.76715088e-02, 5.07930756e-01,  2.51140594e-01, 1.14244278e+02, 1.84989929e-01, 1.98963165e-01,  1.11591339e-01, 8.99429321e-02, 2.18986511e-01, 4.63970184e-01,  1.30668640e-01, 1.16403625e+02, 2.11437225e-01, 2.52361298e-01,  6.95880890e-01, 2.94731140e-01, 3.07762146e-01, 1.14423386e+02,  7.44094849e-01, 1.53099060e-01, 1.15884354e+02, 1.15507591e+02,  1.11807495e+02, 6.12258911e-02, 9.84954834e-02, 5.05699158e-01,  1.10458817e+02, 2.41046906e-01, 1.08047546e+02, 1.15132843e+02,  2.78793335e-01, 4.09416199e-01, 1.00532532e-01, 5.74764252e-01,  7.16388702e-01, 6.46705627e-02, 4.20291901e-01, 4.80052948e-01,  3.20793152e-01, 1.14286972e+02, 1.14286972e+02, 1.15291824e+02,  9.17888641e-01, 1.12606812e+02, 5.15651703e-01, 1.15607834e+02,  3.62579346e-01, 3.30921173e-01, 1.14400803e+02, 1.54300690e-01,  8.18367004e-02, 8.77250671e-01, 6.35795593e-02, 6.02905273e-01,  7.32170105e-01, 4.81357574e-01, 8.31985474e-02, 2.05768585e-01,  3.88618469e-01, 1.60381317e-01, 6.68220520e-01, 7.24582672e-01,  2.82817841e-01, 3.60410980e+02, 6.78939819e-02, 2.81692505e-01,  9.40391541e-01, 1.39122009e-01, 6.88899994e-01, 3.60414825e+02,  5.34294128e-01, 5.04512787e-01, 5.32093048e-01, 1.23809814e-01,  3.35083008e-02, 4.74441528e-01, 5.89378357e-01, 7.02449799e-01,  1.11807495e+02, 6.36165619e-01, 1.16963196e+00, 2.71064758e-01,  1.10065132e+02, 1.39514923e-01, 1.13152039e+02, 1.13315826e+02,  8.44352722e-01, 2.78526306e-01, 2.74635315e-01, 1.17286682e-01,  1.17421654e+02, 2.34981537e-01, 1.12129211e-01, 2.08835602e-01,  5.10986328e-01, 6.67621613e-01, 4.90993500e-01, 5.03494263e-01,  1.22406006e-01, 1.11885162e+02, 5.89431763e-01, 1.17745712e+02,  2.37796783e-01, 1.41143799e-02, 1.42620087e-01, 2.13283539e-01,  1.11001411e+02, 2.42847443e-01, 2.56752014e-01, 4.49218750e-01,  1.15750092e+02, 1.16441170e+02, 8.91025543e-01, 1.10924149e+00,  5.74684143e-01, 3.86215210e-01, 8.78227234e-01, 7.45697021e-02,  1.02760315e-01, 3.60382690e+02, 4.63932037e-01, 3.97098541e-01,  8.73947144e-02, 8.66462708e-01, 1.09903160e+02, 6.47285461e-01,  1.37325287e-01, 1.38500214e-01, 1.93820953e-01, 1.16685211e+02,  4.73865509e-01, 2.75562286e-01, 2.77450562e-01, 1.51493073e-01,  9.19700623e-01, 1.14127991e+02, 1.10660934e+00, 8.11199188e-01,  1.12721725e+02, 5.22426605e-01, 1.15798103e+02, 3.03764343e-01,  1.16767410e+02, 8.57696533e-02, 3.62219482e+02, 2.11601257e-02,  1.09108116e+02, 1.11954094e+02, 2.65693665e-01, 5.21541595e-01,  6.57390594e-01, 3.27301025e-02, 4.10167694e-01, 5.00926971e-01,  3.61498962e+02, 1.17914230e+02, 1.08547775e+02, 3.73958588e-01,  3.08380127e-01, 1.71718597e-01, 1.25091553e-01, 1.15794098e+02,  2.93128967e-01, 6.28166199e-02, 5.08365631e-01, 1.27315521e-01,  1.87259674e-01, 1.13718231e+02, 1.92008972e-01, 3.89247894e-01,  4.67525482e-01, 9.72621918e-01, 2.46196747e-01, 1.15798103e+02,  2.03926086e-01, 3.61800598e+02, 2.23030090e-01, 3.96842957e-02,  1.14257812e-01, 4.99626160e-01, 1.89544678e-01, 4.91458893e-01,  9.83413696e-01, 2.11513519e-01, 1.12231049e+02, 1.42784119e-01],  dtype=float32), 'mean_td_error': 37.32743453979492, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 256.0, 'num_grad_updates_lifetime': 29194.0, 'diff_num_grad_updates_vs_sampler_policy': 29193.0}}, 'num_env_steps_sampled': 97193, 'num_env_steps_trained': 7473664, 'num_agent_steps_sampled': 97193, 'num_agent_steps_trained': 7473664, 'last_target_update_ts': 97193, 'num_target_updates': 29194}",97,192.168.152.36,97193,7473664,97193,1002,7473664,85504,0,3,0,1,85504,"{'cpu_util_percent': 39.38055555555556, 'ram_util_percent': 91.3138888888889}",27220,{},{},{},"{'mean_raw_obs_processing_ms': 1.1364653920600976, 'mean_inference_ms': 2.3486854601514455, 'mean_action_processing_ms': 0.22231085102003592, 'mean_env_wait_ms': 2.9007591400230686, 'mean_env_render_ms': 0.0}","{'episode_reward_max': -156.1826542466879, 'episode_reward_min': -191.28636541962624, 'episode_reward_mean': -169.9554263330996, 'episode_len_mean': 100.0, 'episode_media': {}, 'episodes_this_iter': 10, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [-156.1826542466879, -169.85423006117344, -162.02638640999794, -185.1279215067625, -186.65800455212593, -163.31539402902126, -191.28636541962624, -157.43947839736938, -171.26552772521973, -156.39830098301172], 'episode_lengths': [100, 100, 100, 100, 100, 100, 100, 100, 100, 100]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 1.1364653920600976, 'mean_inference_ms': 2.3486854601514455, 'mean_action_processing_ms': 0.22231085102003592, 'mean_env_wait_ms': 2.9007591400230686, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0}",4664.71,52.5495,4664.71,"{'training_iteration_time_ms': 161.318, 'load_time_ms': 0.312, 'load_throughput': 821657.349, 'learn_time_ms': 25.987, 'learn_throughput': 9851.041, 'synch_weights_time_ms': 7.023}",1675954360,0,97193,97,80340_00000,8.03539


[2m[33m(raylet)[0m [2023-02-09 14:34:57,270 E 26959 27004] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-09_14-34-35_158333_26854 is over 95% full, available space: 1199267840; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-09 14:35:07,276 E 26959 27004] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-09_14-34-35_158333_26854 is over 95% full, available space: 1199169536; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-09 14:35:17,287 E 26959 27004] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-09_14-34-35_158333_26854 is over 95% full, available space: 1199026176; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-09 14:35:27,295 E 26959 27004] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-09_14-34-35_158333_26854 is over 95% full, available space: 119884

[2m[36m(RolloutWorker pid=30579)[0m Loaded XML file successfully


[2m[33m(raylet)[0m [2023-02-09 15:49:10,121 E 26959 27004] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-09_14-34-35_158333_26854 is over 95% full, available space: 1185558528; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-09 15:49:20,130 E 26959 27004] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-09_14-34-35_158333_26854 is over 95% full, available space: 1185566720; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-09 15:49:30,136 E 26959 27004] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-09_14-34-35_158333_26854 is over 95% full, available space: 1185533952; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-09 15:49:37,384 E 26959 26959] (raylet) node_manager.cc:3097: 1 Workers (tasks / actors) killed due to memory pressure (OOM), 0 Workers crashed due to other re