## Installation and Imports

In [1]:
!pwd

/home/daniel/DARM/darm_mujoco/darm_training


In [2]:
# Configure env variables

# TODO: change path
import os
os.environ["DARM_MUJOCO_PATH"] = "/home/daniel/DARM/darm_mujoco"

In [None]:
!pip install ray[rllib] torch
!pip install wandb
!pip install tensorflow_probability

In [4]:
%cd /home/daniel/DARM/darm_mujoco
!python setup.py install

/home/daniel/DARM/darm_mujoco
running install
running bdist_egg
running egg_info
writing darm_gym_env.egg-info/PKG-INFO
writing dependency_links to darm_gym_env.egg-info/dependency_links.txt
writing requirements to darm_gym_env.egg-info/requires.txt
writing top-level names to darm_gym_env.egg-info/top_level.txt
reading manifest file 'darm_gym_env.egg-info/SOURCES.txt'
writing manifest file 'darm_gym_env.egg-info/SOURCES.txt'
installing library code to build/bdist.linux-x86_64/egg
running install_lib
running build_py
creating build/bdist.linux-x86_64/egg
creating build/bdist.linux-x86_64/egg/darm_gym_env
copying build/lib/darm_gym_env/darm_sf_gym.py -> build/bdist.linux-x86_64/egg/darm_gym_env
copying build/lib/darm_gym_env/__init__.py -> build/bdist.linux-x86_64/egg/darm_gym_env
copying build/lib/darm_gym_env/multi_darm_gym.py -> build/bdist.linux-x86_64/egg/darm_gym_env
copying build/lib/darm_gym_env/darm_gym.py -> build/bdist.linux-x86_64/egg/darm_gym_env
copying build/lib/darm_gym_e

In [None]:
# Check if mujoco import is successful
import mujoco

In [None]:
# If mujoco import fails, update pandas and restart runtime
!pip install pandas -U

In [3]:
import ray
from ray.rllib.algorithms.sac import SACConfig
from ray.tune.registry import register_env
from ray.tune.logger import pretty_print

from ray import air, tune
from ray.air import session
from ray.air.integrations.wandb import setup_wandb
from ray.air.integrations.wandb import WandbLoggerCallback

import gym
from darm_gym_env import DARMSFEnv

## Register Environment with RLlib

In [9]:
# env_creator = lambda env_config: gym.make("darm/DarmSFHand-v0", render_mode=None, hand_name="hand1") # DARMSFEnv(render_mode=None, reaction_time=0.08, hand_name="hand1") # 

def make_env(env_config):
    env = gym.wrappers.TimeLimit(env=DARMSFEnv(render_mode=None, reaction_time=0.08, hand_name="hand1"), max_episode_steps=100)
    return env
env_creator = lambda env_config: make_env(env_config) #gym.wrappers.TimeLimit(env=DARMSFEnv(render_mode=None, reaction_time=0.08, hand_name="hand1"), max_episode_steps=100)

register_env("darm/DarmSFHand-v0", env_creator)

## Configure and Run

In [10]:
# TODO:
# change: rollout_workers
# change: gpu

config = (
    SACConfig()
    .environment(
        env="darm/DarmSFHand-v0",
        normalize_actions=True
    )
    .training(
        q_model_config={
            "fcnet_activation": "relu",
            "fcnet_hiddens": [256, 256]
        },
        policy_model_config={
            "fcnet_activation": "relu",
            "fcnet_hiddens": [256, 256]
        },
        tau=0.005,
        target_entropy="auto",
        n_step=1,  # num of SGD steps per batch of data in training step
        train_batch_size=256,
        target_network_update_freq=1,
        replay_buffer_config={"type":"MultiAgentPrioritizedReplayBuffer"},
        num_steps_sampled_before_learning_starts=10_000,
        optimization_config={
          "actor_learning_rate": 0.0003,
          "critic_learning_rate": 0.0003,
          "entropy_learning_rate": 0.0003,
        },
        clip_actions=False
    )
    .rollouts(
        num_rollout_workers=3,
        rollout_fragment_length=1,
        recreate_failed_workers=True,
        num_consecutive_worker_failures_tolerance=10,
        restart_failed_sub_environments=True
    )
    .resources(num_gpus=0)
    .evaluation(evaluation_interval=100) # For 1000 timesteps iter; 100 evals
    .reporting(
        min_sample_timesteps_per_iteration=1000,
        metrics_num_episodes_for_smoothing=5
    )
    .framework(framework="torch")
)

In [11]:
# TODO:
# change: rollout_workers
# change: gpu
# change: tags
# change: name

wandb_init = dict(
    save_code=True,
    config={
        "env": "DARMSFHand-v0",
        
        "actor_learning_rate": 0.0003,
        "critic_learning_rate": 0.0003,
        "entropy_learning_rate": 0.0003,
        "framework": "torch",
        
        "num_rollout_workers": 3,
        "num_gpu": 0,
        "metrics_num_episodes_for_smoothing": 5
    },
    tags=["single_finger"],
    notes="Fixed the env to use targets that are delta increaments from the starting state. Removed velocity penalty, and used only effort penalty",
    name="Test_DARMSF_DELTA_TARGET"
    # job_type=
    # monitor_gym=
)

In [None]:
%cd /home/daniel/DARM/darm-mujoco/darm_training/

In [13]:
# TODO: 
# change: name

tuner = tune.Tuner(
    "SAC",
    run_config=air.RunConfig(
        name="Test_DARMSF_DELTA_TARGET",
        stop={"training_iteration": 10_000, "episode_reward_mean": 200},
        checkpoint_config=air.CheckpointConfig(checkpoint_at_end=True),
        callbacks=[
                WandbLoggerCallback(project="DARM", 
                                    api_key="392c8a47eb0658eb5c71190757a69110e2140f4a",
                                    save_checkpoints=True, 
                                    **wandb_init)
            ],
        local_dir="./results"
        ),
    param_space=config
)

results = tuner.fit()

2023-02-10 10:14:20,826	INFO wandb.py:250 -- Already logged into W&B.


0,1
Current time:,2023-02-10 10:20:23
Running for:,00:06:02.71
Memory:,5.8/7.5 GiB

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
SAC_darm_DarmSFHand-v0_4d890_00000,RUNNING,192.168.152.36:5853,15,324.493,15030,-141.846,248.713,-192.044,90.1


[2m[33m(raylet)[0m [2023-02-10 10:14:24,662 E 4970 5022] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-10_10-10-12_403153_4748 is over 95% full, available space: 1222045696; capacity: 31845081088. Object creation will fail if spilling is required.
[34m[1mwandb[0m: Currently logged in as: [33mdanieladejumo[0m. Use [1m`wandb login --relogin`[0m to force relogin
[2m[36m(SAC pid=5853)[0m 2023-02-10 10:14:25,417	INFO algorithm.py:501 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=5990)[0m   logger.warn(


[2m[36m(RolloutWorker pid=5990)[0m Loaded XML file successfully
[2m[36m(RolloutWorker pid=5989)[0m Loaded XML file successfully


[2m[36m(RolloutWorker pid=5989)[0m   logger.warn(


[2m[36m(RolloutWorker pid=5991)[0m Loaded XML file successfully


[2m[36m(RolloutWorker pid=5991)[0m   logger.warn(


[2m[36m(SAC pid=5853)[0m Loaded XML file successfully


[2m[36m(SAC pid=5853)[0m   logger.warn(
[2m[33m(raylet)[0m [2023-02-10 10:14:34,668 E 4970 5022] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-10_10-10-12_403153_4748 is over 95% full, available space: 1221922816; capacity: 31845081088. Object creation will fail if spilling is required.


Trial name,agent_timesteps_total,counters,custom_metrics,date,done,episode_len_mean,episode_media,episode_reward_max,episode_reward_mean,episode_reward_min,episodes_this_iter,episodes_total,experiment_id,hostname,info,iterations_since_restore,node_ip,num_agent_steps_sampled,num_agent_steps_trained,num_env_steps_sampled,num_env_steps_sampled_this_iter,num_env_steps_trained,num_env_steps_trained_this_iter,num_faulty_episodes,num_healthy_workers,num_in_flight_async_reqs,num_remote_worker_restarts,num_steps_trained_this_iter,perf,pid,policy_reward_max,policy_reward_mean,policy_reward_min,sampler_perf,sampler_results,time_since_restore,time_this_iter_s,time_total_s,timers,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
SAC_darm_DarmSFHand-v0_4d890_00000,15030,"{'num_env_steps_sampled': 15030, 'num_env_steps_trained': 429312, 'num_agent_steps_sampled': 15030, 'num_agent_steps_trained': 429312, 'last_target_update_ts': 15030, 'num_target_updates': 1677}",{},2023-02-10_10-19-58,False,90.1,{},248.713,-141.846,-192.044,10,154,8dd39779c7ef477aa164e54f946852e0,Daniel,"{'learner': {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 8.403343200683594, 'actor_loss': -6.434066295623779, 'critic_loss': 0.224114790558815, 'alpha_loss': -4.229072570800781, 'alpha_value': 0.60455614, 'log_alpha_value': -0.50326073, 'target_entropy': -5.0, 'policy_t': -0.027431469410657883, 'mean_q': 4.410746097564697, 'max_q': 5.1180830001831055, 'min_q': 3.604030132293701}, 'td_error': array([3.76369953e-01, 2.18138933e-01, 8.90897512e-01, 5.00837326e-01,  1.56644344e-01, 1.15910602e+00, 3.58983755e-01, 9.73417044e-01,  4.54557180e-01, 3.18669558e-01, 2.47295618e-01, 3.98333549e-01,  5.52456379e-02, 4.16686535e-01, 5.52578926e-01, 8.20993423e-01,  4.80461121e-02, 5.51123619e-01, 3.48044634e-01, 4.45616722e-01,  6.57121658e-01, 3.01966190e-01, 4.55074072e-01, 5.81701994e-01,  4.43889856e-01, 3.61741066e-01, 6.02926254e-01, 6.10968113e-01,  2.43805664e+02, 6.97162867e-01, 1.34469032e-01, 5.01981974e-01,  5.75793648e+00, 3.33922148e-01, 2.84593582e-01, 8.42299938e-01,  3.37388039e-01, 4.18749571e-01, 5.26380062e-01, 4.35695648e-01,  4.50320482e-01, 6.56141853e+00, 9.19794083e-01, 4.95012522e-01,  1.86808348e-01, 2.88064480e-01, 9.34903622e-02, 5.33180237e-01,  7.46445179e-01, 3.29239368e-01, 4.63973522e-01, 5.52854300e-01,  5.59043884e-01, 5.79916573e+00, 3.95936966e-01, 3.83873940e-01,  7.85346985e-01, 3.67263794e-01, 5.48842669e-01, 6.90719604e-01,  3.13124657e-02, 7.84007549e-01, 4.98735189e-01, 4.51488495e-01,  2.78318405e-01, 8.23536158e-01, 5.27554512e-01, 1.09366131e+00,  2.44073761e+02, 5.64733505e-01, 1.87523842e-01, 8.82987976e-01,  4.84059095e-01, 6.78997517e-01, 8.01151276e-01, 6.06491089e-01,  4.78818417e-01, 3.75759125e-01, 5.04173756e-01, 1.41024947e-01,  2.97756195e-01, 6.70681572e+00, 6.38502359e-01, 1.84234142e-01,  3.44598055e-01, 2.66064882e-01, 6.22176886e-01, 2.91838408e-01,  6.30319595e-01, 3.90557766e-01, 7.24813938e-01, 6.66464329e-01,  6.48372173e-01, 1.69317245e-01, 5.71012974e-01, 2.15357304e-01,  4.80374098e-01, 4.54566240e-01, 6.62336826e-01, 5.37418842e-01,  2.28084564e-01, 1.80234909e-01, 3.18274736e-01, 5.42303801e-01,  2.43873016e+02, 3.50508213e-01, 6.72996044e-01, 3.73209953e-01,  8.81858826e-01, 4.57011461e-01, 6.31304979e-01, 3.22345257e-01,  4.37150240e-01, 8.15716267e-01, 6.95113182e-01, 5.28904438e-01,  6.64132166e+00, 3.57316017e-01, 7.54707336e-01, 5.63272238e-01,  8.61629725e-01, 1.76108837e-01, 8.31902027e-02, 1.03891134e-01,  3.71709824e-01, 5.50470591e-01, 7.84245253e-01, 3.21080923e-01,  3.92820835e-02, 5.49223900e-01, 6.91530704e-01, 3.57518673e-01,  3.84907722e-01, 4.12734747e-01, 4.18194771e-01, 7.22610235e-01,  3.24485064e-01, 5.38154554e+00, 7.06630945e-01, 1.67124152e-01,  6.63487911e-01, 8.02201986e-01, 2.52504587e-01, 9.75709677e-01,  4.89573479e-01, 6.64892435e-01, 1.03562427e+00, 3.95295620e-02,  5.00542164e-01, 9.13107872e-01, 5.89080334e-01, 6.50176048e-01,  6.37590694e+00, 8.55698586e-02, 3.50122690e-01, 6.35562181e-01,  3.89072657e-01, 4.41820145e-01, 2.79556274e-01, 1.20183945e+00,  5.35343766e-01, 5.90878248e-01, 6.06690645e-01, 7.29044676e-01,  2.38976479e-01, 3.35858583e-01, 5.22370100e-01, 6.08634281e+00,  6.85944557e-01, 4.10863638e-01, 3.76061916e-01, 3.61318350e-01,  1.09372520e+00, 6.78944349e-01, 5.75511694e-01, 3.01788330e-01,  2.70901680e-01, 3.42419863e-01, 3.19597006e-01, 1.56586289e-01,  6.28098488e-01, 8.64251852e-01, 3.41733932e-01, 7.75881290e-01,  2.96623230e-01, 3.41565371e-01, 2.20751286e-01, 3.47970247e-01,  9.26482439e-01, 5.77410221e-01, 1.53335571e-01, 1.95543766e-02,  7.58130074e-01, 4.61048365e-01, 4.55544472e-01, 4.03303385e-01,  5.41186571e-01, 6.24453068e-01, 4.88040209e-01, 4.26761150e-01,  1.01890087e-01, 8.34158182e-01, 6.50000811e-01, 5.72202206e-01,  6.71605349e-01, 5.71026325e-01, 4.86349344e-01, 3.68950129e-01,  3.34364176e-01, 7.52686977e-01, 3.25332880e-01, 3.64549398e-01,  2.07561970e-01, 7.46529102e-01, 1.55097723e-01, 4.25298929e-01,  8.42193842e-01, 2.43873016e+02, 2.43873016e+02, 2.82555580e-01,  4.45864916e-01, 4.59903955e-01, 5.92770100e-01, 8.09845448e-01,  8.23133469e-01, 5.77309561e+00, 5.96422911e-01, 7.31289148e-01,  3.31299424e-01, 5.85563135e+00, 5.36827183e+00, 3.30348015e-01,  5.50782681e-02, 8.04073811e-02, 6.67590857e-01, 6.92529202e-01,  3.84135246e-01, 3.31759453e-01, 3.52068186e-01, 6.84356213e-01,  6.38411427e+00, 2.44073761e+02, 9.02929783e-01, 6.10304737e+00,  5.87041140e-01, 3.18532944e-01, 5.26460886e-01, 5.01149416e-01,  3.49880457e-01, 4.51208830e-01, 1.33623719e-01, 1.37279153e-01,  4.75117207e-01, 4.51113939e-01, 3.33072662e-01, 4.37882900e-01],  dtype=float32), 'mean_td_error': 6.474668979644775, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 256.0, 'num_grad_updates_lifetime': 1677.0, 'diff_num_grad_updates_vs_sampler_policy': 1676.0}}, 'num_env_steps_sampled': 15030, 'num_env_steps_trained': 429312, 'num_agent_steps_sampled': 15030, 'num_agent_steps_trained': 429312, 'last_target_update_ts': 15030, 'num_target_updates': 1677}",15,192.168.152.36,15030,429312,15030,1002,429312,85504,0,3,0,0,85504,"{'cpu_util_percent': 46.60649350649351, 'ram_util_percent': 77.04415584415582}",5853,{},{},{},"{'mean_raw_obs_processing_ms': 1.2387199854807946, 'mean_inference_ms': 2.3233264583571662, 'mean_action_processing_ms': 0.2227478173887199, 'mean_env_wait_ms': 3.0315715951183035, 'mean_env_render_ms': 0.0}","{'episode_reward_max': 248.7129012644291, 'episode_reward_min': -192.04356507956982, 'episode_reward_mean': -141.84599248990418, 'episode_len_mean': 90.1, 'episode_media': {}, 'episodes_this_iter': 10, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [-188.50081959366798, -187.97007030248642, -189.0042775273323, -170.24335712194443, 248.7129012644291, -185.3823484480381, -187.89898101985455, -192.04356507956982, -176.78319323062897, -189.34621383994818], 'episode_lengths': [100, 100, 100, 100, 1, 100, 100, 100, 100, 100]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 1.2387199854807946, 'mean_inference_ms': 2.3233264583571662, 'mean_action_processing_ms': 0.2227478173887199, 'mean_env_wait_ms': 3.0315715951183035, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0}",324.493,55.609,324.493,"{'training_iteration_time_ms': 144.554, 'load_time_ms': 0.263, 'load_throughput': 971712.058, 'learn_time_ms': 23.944, 'learn_throughput': 10691.626, 'synch_weights_time_ms': 5.41}",1676020798,0,15030,15,4d890_00000,7.98138


[2m[33m(raylet)[0m [2023-02-10 10:14:44,673 E 4970 5022] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-10_10-10-12_403153_4748 is over 95% full, available space: 1221365760; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-10 10:14:54,678 E 4970 5022] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-10_10-10-12_403153_4748 is over 95% full, available space: 1221214208; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-10 10:15:04,684 E 4970 5022] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-10_10-10-12_403153_4748 is over 95% full, available space: 1221103616; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-10 10:15:14,690 E 4970 5022] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-10_10-10-12_403153_4748 is over 95% full, available space: 1220038656; capaci