## Installation and Imports

In [None]:
# Check if GCC is installed
!gcc --version

In [None]:
# Install GCC if absent
!sudo apt update
!sudo apt install build-essential -y

In [None]:
# # Setup Mujoco for gym
# !apt-get install -y \
#     libgl1-mesa-dev \
#     libgl1-mesa-glx \
#     libglew-dev \
#     libosmesa6-dev \
#     software-properties-common

# !apt-get install -y patchelf

# !pip install gym

# !pip install free-mujoco-py

# import mujoco_py
# import gym

In [None]:
!pip install ray[rllib] torch
!pip install tensorflow_probability
!pip install wandb

In [1]:
import ray
from ray.rllib.algorithms.es import ESConfig
from ray.tune.registry import register_env
from ray.tune.logger import pretty_print

from ray import air, tune
from ray.air import session
from ray.air.integrations.wandb import setup_wandb
from ray.air.integrations.wandb import WandbLoggerCallback

import gym

In [2]:
# Test that mujoco for gym is available
# gym.make("HalfCheetah-v3")
gym.make("Reacher-v2")

<TimeLimit<ReacherEnv<Reacher-v2>>>

## Configure and Run

In [3]:
config = (
    ESConfig()
    .environment(
        env="Reacher-v2"
    )
    .rollouts(
        num_rollout_workers=3,
        num_envs_per_worker=8,
        recreate_failed_workers=True,
        num_consecutive_worker_failures_tolerance=10,
        restart_failed_sub_environments=True,
    )
    .resources(num_gpus=0)
    # .evaluation(evaluation_interval=100) # For 1000 timesteps iter; 100 evals
    .framework(framework="torch")
)
config.to_dict()

{'extra_python_environs_for_driver': {},
 'extra_python_environs_for_worker': {},
 'num_gpus': 0,
 'num_cpus_per_worker': 1,
 'num_gpus_per_worker': 0,
 '_fake_gpus': False,
 'custom_resources_per_worker': {},
 'placement_strategy': 'PACK',
 'eager_tracing': False,
 'eager_max_retraces': 20,
 'tf_session_args': {'intra_op_parallelism_threads': 2,
  'inter_op_parallelism_threads': 2,
  'gpu_options': {'allow_growth': True},
  'log_device_placement': False,
  'device_count': {'CPU': 1},
  'allow_soft_placement': True},
 'local_tf_session_args': {'intra_op_parallelism_threads': 8,
  'inter_op_parallelism_threads': 8},
 'env': 'Reacher-v2',
 'env_config': {},
 'observation_space': None,
 'action_space': None,
 'env_task_fn': None,
 'render_env': False,
 'clip_rewards': None,
 'normalize_actions': True,
 'clip_actions': False,
 'disable_env_checking': False,
 'num_envs_per_worker': 8,
 'sample_collector': ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector,
 'sample_as

In [7]:
wandb_init = dict(
    save_code=True,
    config={
        "env": "HalfCheetah-v3",
        "num_rollout_workers": 3,
        "num_envs_per_worker": 8,
        "recreate_failed_workers": True,
        "num_consecutive_worker_failures_tolerance": 10,
        "restart_failed_sub_environments": True,
        "num_gpus": 0,
        "framework": "torch"
    },
    tags=["local"],
    notes="Test to inspect scaling on Vast.ai",
    name="ES_HalfCheetah_local"
    # job_type=D
    # monitor_gym=
)

In [8]:
tuner = tune.Tuner(
    "ES",
    run_config=air.RunConfig(
        name="ES_HalfCheetah_local",
        stop={"episode_reward_mean": 150},
        checkpoint_config=air.CheckpointConfig(checkpoint_at_end=True),
        callbacks=[
                WandbLoggerCallback(project="HalfCheetah", 
                                    api_key="392c8a47eb0658eb5c71190757a69110e2140f4a",
                                    save_checkpoints=True, 
                                    **wandb_init)
            ],
        local_dir="./results"
        ),
    param_space=config.to_dict()
)

results = tuner.fit()

2023-02-13 01:19:28,098	INFO worker.py:1538 -- Started a local Ray instance.


0,1
Current time:,2023-02-13 01:35:47
Running for:,00:16:17.53
Memory:,6.3/7.5 GiB

Trial name,status,loc,iter,total time (s),ts,reward,episode_len_mean,timesteps_this_iter
ES_HalfCheetah-v3_14e1e_00000,TERMINATED,192.168.84.35:6014,3,948.164,3006000.0,377.07,1000,1002000.0


[34m[1mwandb[0m: Currently logged in as: [33mdanieladejumo[0m. Use [1m`wandb login --relogin`[0m to force relogin
[2m[36m(ES pid=6014)[0m 2023-02-13 01:19:34,601	INFO policy.py:1147 -- Policy (worker=local) running on CPU.
[2m[36m(ES pid=6014)[0m 2023-02-13 01:19:34,601	INFO torch_policy.py:184 -- Found 0 visible cuda devices.
[2m[36m(ES pid=6014)[0m 2023-02-13 01:19:34,630	INFO es.py:396 -- Creating shared noise table.
[2m[33m(raylet)[0m [2023-02-13 01:19:38,020 E 5740 5790] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-13_01-19-25_995520_5491 is over 95% full, available space: 116240384; capacity: 31845081088. Object creation will fail if spilling is required.


[2m[33m(raylet)[0m [2023-02-13 01:19:48,027 E 5740 5790] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-13_01-19-25_995520_5491 is over 95% full, available space: 111587328; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[36m(ES pid=6014)[0m 2023-02-13 01:19:48,568	INFO es.py:401 -- Creating actors.
[2m[36m(ES pid=6014)[0m 2023-02-13 01:19:48,747	INFO trainable.py:172 -- Trainable.setup took 15.089 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(ES pid=6014)[0m 2023-02-13 01:19:48,796	INFO es.py:568 -- Collected 0 episodes 0 timesteps so far this iter
[2m[36m(Worker pid=6311)[0m 2023-02-13 01:19:57,725	INFO policy.py:1147 -- Policy (worker=local) running on CPU.
[2m[36m(Worker pid=6311)[0m 2023-02-13 01:19:57,725	INFO torch_policy.py:184 -- Found 0 visible cuda devices.
[2m[36m(Worker pid=6311)[0m   new_theta_dict[k] = torch.from_numpy(
[2m

Trial name,date,done,episode_len_mean,episode_reward_mean,episodes_total,experiment_id,hostname,info,iterations_since_restore,node_ip,perf,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_this_iter,timesteps_total,training_iteration,trial_id,warmup_time
ES_HalfCheetah-v3_14e1e_00000,2023-02-13_01-35-36,True,1000,377.07,,f6d671dd5c3a4f61903ff6aef3f964d4,Daniel,"{'weights_norm': 524.1114, 'grad_norm': 6.04469, 'update_ratio': 0.06821026, 'episodes_this_iter': 1002, 'episodes_so_far': 3006}",3,192.168.84.35,"{'cpu_util_percent': 79.84350797266515, 'ram_util_percent': 86.06241457858769}",6014,948.164,308.933,948.164,1676248536,3006000.0,1002000.0,3006000.0,3,14e1e_00000,15.1192


[2m[36m(ES pid=6014)[0m 2023-02-13 01:25:15,862	INFO filter_manager.py:34 -- Synchronizing filters ...
[2m[36m(ES pid=6014)[0m 2023-02-13 01:25:15,867	INFO filter_manager.py:55 -- Updating remote filters ...
[2m[36m(ES pid=6014)[0m 2023-02-13 01:25:15,899	INFO es.py:568 -- Collected 0 episodes 0 timesteps so far this iter
[2m[36m(ES pid=6014)[0m 2023-02-13 01:25:17,618	INFO es.py:568 -- Collected 6 episodes 6000 timesteps so far this iter
[2m[33m(raylet)[0m [2023-02-13 01:25:18,343 E 5740 5790] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-13_01-19-25_995520_5491 is over 95% full, available space: 110739456; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[36m(ES pid=6014)[0m 2023-02-13 01:25:19,623	INFO es.py:568 -- Collected 12 episodes 12000 timesteps so far this iter
[2m[36m(ES pid=6014)[0m 2023-02-13 01:25:21,416	INFO es.py:568 -- Collected 18 episodes 18000 timesteps so far this iter
[2m[36m(ES pid=6014)[0m 2023

[2m[33m(raylet)[0m [2023-02-13 01:35:38,896 E 5740 5790] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-13_01-19-25_995520_5491 is over 95% full, available space: 108421120; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[36m(Worker pid=6310)[0m 2023-02-13 01:35:47,098	ERROR actor_manager.py:177 -- Worker exception, recreating: 'Worker' object has no attribute 'stop'
[2m[36m(Worker pid=6310)[0m Traceback (most recent call last):
[2m[36m(Worker pid=6310)[0m   File "/home/daniel/miniconda3/lib/python3.8/site-packages/ray/rllib/utils/actor_manager.py", line 174, in apply
[2m[36m(Worker pid=6310)[0m     return func(self, *args, **kwargs)
[2m[36m(Worker pid=6310)[0m   File "/home/daniel/miniconda3/lib/python3.8/site-packages/ray/rllib/evaluation/worker_set.py", line 632, in <lambda>
[2m[36m(Worker pid=6310)[0m     lambda w: w.stop(), healthy_only=False, local_worker=True
[2m[36m(Worker pid=6310)[0m AttributeError: 'Worker'

In [5]:
# TODO:
# change: path
# change: experiment name


# Configure env variables
import os
os.environ["DARM_MUJOCO_PATH"] = "/home/daniel/DARM/darm_mujoco"
os.getenv('DARM_MUJOCO_PATH')

# Restore Interrupted run
tuner = tune.Tuner.restore(
    f"{os.getenv('DARM_MUJOCO_PATH')}/darm_training/results/ES_Reacher_vast_ai",
    resume_errored=True
)
tuner

<ray.tune.tuner.Tuner at 0x7f5058734e20>

In [6]:
results = tuner.get_results()
best_result = results.get_best_result(metric="episode_reward_mean", mode="max")
best_checkpoint = best_result.checkpoint
best_checkpoint

In [3]:
# from ray.rllib.algorithms.algorithm import Algorithm
# algo = Algorithm.from_checkpoint(best_checkpoint._local_path)
# algo

algo = config.build()

2023-02-13 01:48:35,930	INFO worker.py:1538 -- Started a local Ray instance.
[2m[33m(raylet)[0m [2023-02-13 01:48:45,825 E 8496 8547] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-13_01-48-33_628035_8380 is over 95% full, available space: 106409984; capacity: 31845081088. Object creation will fail if spilling is required.
2023-02-13 01:48:49,214	INFO es.py:401 -- Creating actors.
2023-02-13 01:48:49,400	INFO trainable.py:172 -- Trainable.setup took 16.959 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(Worker pid=8758)[0m 2023-02-13 01:48:54,273	INFO policy.py:1147 -- Policy (worker=local) running on CPU.
[2m[36m(Worker pid=8758)[0m 2023-02-13 01:48:54,273	INFO torch_policy.py:184 -- Found 0 visible cuda devices.
[2m[36m(Worker pid=8757)[0m 2023-02-13 01:48:54,967	INFO policy.py:1147 -- Policy (worker=local) running on CPU.
[2m[36m(Worker pid=8757)[0m 2023-02-13 01:48:54,967	

In [5]:
# algo.restore(best_checkpoint._local_path)
algo.restore("/home/daniel/DARM/darm_mujoco/darm_training/results/HalfCheetah_local/ES_HalfCheetah-v3_14e1e_00000_0_2023-02-13_01-19-29/checkpoint_000003")

2023-02-13 01:49:44,131	INFO filter_manager.py:34 -- Synchronizing filters ...
2023-02-13 01:49:44,153	INFO filter_manager.py:55 -- Updating remote filters ...
2023-02-13 01:49:44,171	INFO trainable.py:790 -- Restored on 192.168.84.35 from checkpoint: /home/daniel/DARM/darm_mujoco/darm_training/results/HalfCheetah_local/ES_HalfCheetah-v3_14e1e_00000_0_2023-02-13_01-19-29/checkpoint_000003
2023-02-13 01:49:44,173	INFO trainable.py:799 -- Current state after restoring: {'_iteration': 3, '_timesteps_total': 3006000, '_time_total': 948.164491891861, '_episodes_total': None}
[2m[33m(raylet)[0m [2023-02-13 01:49:45,916 E 8496 8547] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-13_01-48-33_628035_8380 is over 95% full, available space: 108113920; capacity: 31845081088. Object creation will fail if spilling is required.


In [10]:
import gym

env = gym.make("HalfCheetah-v3")

episode_reward = 0
done = False
obs = env.reset()

while not done:
    env.render()
    action = algo.compute_single_action(obs)
    obs,rew, done, info = env.step(action)
    episode_reward += rew
episode_reward
env.close()

Creating window glfw


[2m[33m(raylet)[0m [2023-02-13 01:51:16,047 E 8496 8547] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-13_01-48-33_628035_8380 is over 95% full, available space: 107986944; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-13 01:51:26,062 E 8496 8547] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-13_01-48-33_628035_8380 is over 95% full, available space: 107982848; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-13 01:51:36,082 E 8496 8547] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-13_01-48-33_628035_8380 is over 95% full, available space: 107950080; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-13 01:51:46,099 E 8496 8547] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-13_01-48-33_628035_8380 is over 95% full, available space: 107941888; capacity: 

[2m[33m(raylet)[0m [2023-02-13 01:51:06,038 E 8496 8547] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-13_01-48-33_628035_8380 is over 95% full, available space: 90435584; capacity: 31845081088. Object creation will fail if spilling is required.
