## Installation and Imports

In [None]:
!pip install ray[rllib] torch
!pip install tensorflow_probability
!pip install wandb

In [None]:
# Check if GCC is installed
!gcc --version

In [None]:
# Install GCC if absent
!sudo apt update
!sudo apt install build-essential -y

In [None]:
# # Setup Mujoco for gym
# !apt-get install -y \
#     libgl1-mesa-dev \
#     libgl1-mesa-glx \
#     libglew-dev \
#     libosmesa6-dev \
#     software-properties-common

# !apt-get install -y patchelf

# !pip install gym

# !pip install free-mujoco-py

# import mujoco_py
# import gym

In [1]:
import ray
from ray.rllib.algorithms.sac import SACConfig
from ray.tune.registry import register_env
from ray.tune.logger import pretty_print

from ray import air, tune
from ray.air import session
from ray.air.integrations.wandb import setup_wandb
from ray.air.integrations.wandb import WandbLoggerCallback

import gym

In [2]:
# Test that mujoco for gym is available
gym.make("HalfCheetah-v3")

<TimeLimit<HalfCheetahEnv<HalfCheetah-v3>>>

## Configure and Run

In [3]:
config = (
    SACConfig()
    .environment(
        env="HalfCheetah-v3",
        normalize_actions=True
    )
    .training(
        q_model_config={
            "fcnet_activation": "relu",
            "fcnet_hiddens": [256, 256]
        },
        policy_model_config={
            "fcnet_activation": "relu",
            "fcnet_hiddens": [256, 256]
        },
        tau=0.005,
        target_entropy="auto",
        n_step=1,  # num of SGD steps per batch of data in training step
        train_batch_size=256,
        target_network_update_freq=1,
        replay_buffer_config={"type":"MultiAgentPrioritizedReplayBuffer"},
        num_steps_sampled_before_learning_starts=10_000,
        optimization_config={
          "actor_learning_rate": 0.0003,
          "critic_learning_rate": 0.0003,
          "entropy_learning_rate": 0.0003,
        },
        clip_actions=False
    )
    .rollouts(
        num_rollout_workers=3,
        rollout_fragment_length=1,
    )
    .resources(num_gpus=0)
    .evaluation(evaluation_interval=100) # For 1000 timesteps iter; 100 evals
    .reporting(
        min_sample_timesteps_per_iteration=1000,
        metrics_num_episodes_for_smoothing=5
    )
    .framework(framework="torch")
)

In [4]:
wandb_init = dict(
    save_code=True,
    config={
        "env": "HalfCheetah-v3",
        
        "actor_learning_rate": 0.0003,
        "critic_learning_rate": 0.0003,
        "entropy_learning_rate": 0.0003,
        "framework": "torch",
        
        "num_rollout_workers": 3,
        "num_gpu": 0,
        "metrics_num_episodes_for_smoothing": 5
    },
    tags=["local"],
    notes="Test to inspect scaling on Vast.ai",
    name="HalfCheetah_local"
    # job_type=D
    # monitor_gym=
)

In [5]:
tuner = tune.Tuner(
    "SAC",
    run_config=air.RunConfig(
        name="HalfCheetah_local",
        stop={"training_iteration": 3_000, "episode_reward_mean": 150},
        checkpoint_config=air.CheckpointConfig(checkpoint_at_end=True),
        callbacks=[
                WandbLoggerCallback(project="HalfCheetah", 
                                    api_key="392c8a47eb0658eb5c71190757a69110e2140f4a",
                                    save_checkpoints=True, 
                                    **wandb_init)
            ],
        local_dir="./results"
        ),
    param_space=config
)

results = tuner.fit()

2023-02-10 13:05:53,494	INFO worker.py:1538 -- Started a local Ray instance.


0,1
Current time:,2023-02-10 13:29:44
Running for:,00:23:48.89
Memory:,6.8/7.5 GiB

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
SAC_HalfCheetah-v3_45677_00000,RUNNING,192.168.152.36:14767,33,1361.66,33066,-215.578,-167.92,-263.956,1000


[34m[1mwandb[0m: Currently logged in as: [33mdanieladejumo[0m. Use [1m`wandb login --relogin`[0m to force relogin
[2m[36m(SAC pid=14767)[0m 2023-02-10 13:05:59,642	INFO algorithm.py:501 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[33m(raylet)[0m [2023-02-10 13:06:03,390 E 14506 14551] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-10_13-05-51_350412_13852 is over 95% full, available space: 1204518912; capacity: 31845081088. Object creation will fail if spilling is required.


[2m[36m(SAC pid=14767)[0m 2023-02-10 13:06:10,815	INFO trainable.py:172 -- Trainable.setup took 11.177 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[33m(raylet)[0m [2023-02-10 13:06:13,402 E 14506 14551] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-10_13-05-51_350412_13852 is over 95% full, available space: 1204424704; capacity: 31845081088. Object creation will fail if spilling is required.


Trial name,agent_timesteps_total,counters,custom_metrics,date,done,episode_len_mean,episode_media,episode_reward_max,episode_reward_mean,episode_reward_min,episodes_this_iter,episodes_total,experiment_id,hostname,info,iterations_since_restore,node_ip,num_agent_steps_sampled,num_agent_steps_trained,num_env_steps_sampled,num_env_steps_sampled_this_iter,num_env_steps_trained,num_env_steps_trained_this_iter,num_faulty_episodes,num_healthy_workers,num_in_flight_async_reqs,num_remote_worker_restarts,num_steps_trained_this_iter,perf,pid,policy_reward_max,policy_reward_mean,policy_reward_min,sampler_perf,sampler_results,time_since_restore,time_this_iter_s,time_total_s,timers,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
SAC_HalfCheetah-v3_45677_00000,33066,"{'num_env_steps_sampled': 33066, 'num_env_steps_trained': 1968384, 'num_agent_steps_sampled': 33066, 'num_agent_steps_trained': 1968384, 'last_target_update_ts': 33066, 'num_target_updates': 7689}",{},2023-02-10_13-28-53,False,1000,{},-167.92,-215.578,-263.956,3,33,4b3554eec46b41eeb5d922821f573f3e,Daniel,"{'learner': {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 7.748189926147461, 'actor_loss': -28.348724365234375, 'critic_loss': 0.0571567639708519, 'alpha_loss': -17.404212951660156, 'alpha_value': 0.105797395, 'log_alpha_value': -2.2462294, 'target_entropy': -6.0, 'policy_t': 0.03609086945652962, 'mean_q': 27.962879180908203, 'max_q': 40.63548278808594, 'min_q': 18.630765914916992}, 'td_error': array([1.3438015e+00, 7.2225761e-01, 6.5246391e-01, 1.3963299e+00,  4.7867012e-01, 1.8056650e+00, 4.8855591e-01, 8.7155342e-01,  5.4131222e-01, 3.4649467e-01, 1.3100796e+00, 2.4633980e-01,  1.0608139e+00, 4.6612740e-01, 2.5631332e+00, 1.2575035e+00,  6.2781525e-01, 6.4079666e-01, 2.9129791e+00, 1.8675900e+00,  1.0474615e+00, 3.4719467e-01, 1.5973949e+00, 1.3353758e+00,  3.6047268e-01, 8.5598087e-01, 1.7818727e+00, 5.7840347e-03,  2.5633602e+00, 2.2904625e+00, 1.1524830e+00, 3.1922340e-01,  1.3108168e+00, 1.3822460e+00, 1.4068127e+00, 2.6141644e-01,  1.1472569e+00, 3.0969048e+00, 2.9394455e+00, 2.3422575e+00,  1.2751608e+00, 8.7643051e-01, 6.5191746e-01, 1.1655207e+00,  2.2592678e+00, 3.5607529e-01, 2.7440681e+00, 5.3690624e-01,  3.2188320e-01, 6.7167473e-01, 8.1709671e-01, 5.7491589e-01,  3.5024929e-01, 2.1094236e+00, 4.0640068e-01, 1.9417572e+00,  1.4235373e+00, 6.6273460e+00, 1.2791414e+00, 1.8778706e-01,  3.7650108e-01, 1.5470619e+00, 8.8894939e-01, 1.2622957e+00,  1.4059992e+00, 4.4786835e+00, 3.0064392e-01, 4.6643753e+00,  8.2291031e-01, 8.2259083e-01, 5.3247452e-01, 6.9318581e-01,  1.9925117e-01, 1.8472509e+00, 1.4374723e+00, 2.5366497e-01,  5.5396366e-01, 3.6727638e+00, 5.5188751e-01, 1.8304482e+00,  7.7800083e-01, 7.0011711e-01, 2.5716000e+00, 4.0670872e-01,  1.3090382e+00, 1.2051811e+00, 3.5901833e-01, 8.6568260e-01,  2.8754244e+00, 9.3017006e-01, 2.5213890e+00, 1.5350399e+00,  1.7390957e+00, 6.7485809e-01, 1.1417923e+00, 1.4657469e+00,  1.3714056e+00, 3.4017944e-01, 3.8861942e-01, 4.9283409e-01,  2.5856667e+00, 7.3075104e-01, 1.7031641e+00, 1.1696568e+00,  4.5531273e-02, 2.9010105e-01, 7.5473881e-01, 5.5175400e-01,  4.5319939e-01, 9.0274811e-01, 1.4756994e+00, 6.5055084e-01,  2.4201202e-01, 1.4595299e+00, 3.3723164e-01, 1.6024990e+00,  1.9642448e+00, 1.7673407e+00, 1.7865944e-01, 1.3689117e+00,  1.7087860e+00, 1.1464615e+00, 1.0831547e+00, 5.8963966e-01,  1.3120794e+00, 5.2829361e-01, 9.3073273e-01, 9.3682480e-01,  1.1376324e+00, 1.3203411e+00, 1.3815298e+00, 2.7472620e+00,  4.1015625e-02, 1.3482714e+00, 2.1516657e+00, 3.8547230e-01,  1.8757095e+00, 1.6213951e+00, 2.0971298e+00, 3.2355595e-01,  4.0776539e-01, 2.6312170e+00, 9.6689129e-01, 5.7242775e-01,  6.0077953e-01, 2.8788586e+00, 9.8828125e-01, 3.8749695e-01,  3.4899616e-01, 1.7398262e-01, 1.3934631e+00, 1.5761843e+00,  1.9233875e+00, 6.5947151e-01, 1.1539774e+00, 7.4207115e-01,  2.1392918e-01, 3.7877083e-02, 1.7075329e+00, 5.9697342e-01,  1.1136246e+00, 4.8617554e-01, 4.0090370e-01, 1.7907858e+00,  1.2907381e+00, 4.7705841e-01, 7.9442024e-02, 2.9774666e-01,  1.0693092e+00, 3.4962654e-01, 7.7527809e-01, 7.2861958e-01,  1.0401735e+00, 1.3869801e+00, 9.8899841e-01, 3.7216501e+00,  1.7281551e+00, 7.2134781e-01, 2.5292969e-01, 1.7811966e+00,  4.1616726e-01, 8.6394119e-01, 1.9550228e-01, 6.5260887e-01,  1.1709719e+00, 4.3296337e-01, 1.5446453e+00, 1.2053957e+00,  1.6001425e+00, 1.1142778e+00, 1.3936901e+00, 7.2607422e-01,  1.0596695e+00, 1.0363798e+00, 2.7054327e+01, 1.6145039e-01,  4.0457535e-01, 7.7237701e-01, 8.2118034e-01, 1.3983669e+00,  2.8749599e+00, 2.6980381e+00, 1.5176229e+00, 1.2906084e+00,  3.3011627e-01, 3.9676075e+00, 3.4671507e+00, 5.9925079e-01,  6.8101883e-01, 5.9643269e-01, 6.3999367e-01, 1.1433306e+00,  2.3079777e-01, 1.4596720e+00, 1.3927135e+00, 1.2096643e+00,  7.4028397e-01, 1.5208483e+00, 8.5922813e-01, 4.4540787e-01,  4.5723152e-01, 1.2031927e+00, 8.3436584e-01, 2.5109596e+00,  4.7055244e-01, 2.0647030e+00, 2.6177816e+00, 6.3316822e-01,  1.8749619e-01, 6.8091202e-01, 6.7880821e-01, 1.8121748e+00,  3.7192249e-01, 6.2009048e-01, 2.5033092e+00, 2.0851898e-01,  4.0450096e-01, 8.4938908e-01, 4.4171619e-01, 1.4198303e-01,  3.9256382e-01, 6.5105820e-01, 6.3901806e-01, 2.9276619e+01,  9.6225357e-01, 9.9129772e-01, 1.3503380e+00, 2.9096699e-01,  6.9911480e-01, 7.7093887e-01, 2.1277142e-01, 1.1472692e+00,  4.1746140e-01, 6.3711739e-01, 9.8512173e-01, 2.7659817e+00],  dtype=float32), 'mean_td_error': 1.353323221206665, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 256.0, 'num_grad_updates_lifetime': 7689.0, 'diff_num_grad_updates_vs_sampler_policy': 7688.0}}, 'num_env_steps_sampled': 33066, 'num_env_steps_trained': 1968384, 'num_agent_steps_sampled': 33066, 'num_agent_steps_trained': 1968384, 'last_target_update_ts': 33066, 'num_target_updates': 7689}",33,192.168.152.36,33066,1968384,33066,1002,1968384,85504,0,3,0,0,85504,"{'cpu_util_percent': 51.79411764705881, 'ram_util_percent': 92.46705882352937}",14767,{},{},{},"{'mean_raw_obs_processing_ms': 1.357153863304131, 'mean_inference_ms': 2.490057054053702, 'mean_action_processing_ms': 0.23928675805918923, 'mean_env_wait_ms': 0.29093273375258427, 'mean_env_render_ms': 0.0}","{'episode_reward_max': -167.91988759326725, 'episode_reward_min': -263.95626261651586, 'episode_reward_mean': -215.57811427532747, 'episode_len_mean': 1000.0, 'episode_media': {}, 'episodes_this_iter': 3, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [-215.8739431473014, -218.1805715779573, -211.95990644159554, -263.95626261651586, -167.91988759326725], 'episode_lengths': [1000, 1000, 1000, 1000, 1000]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 1.357153863304131, 'mean_inference_ms': 2.490057054053702, 'mean_action_processing_ms': 0.23928675805918923, 'mean_env_wait_ms': 0.29093273375258427, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0}",1361.66,61.4308,1361.66,"{'training_iteration_time_ms': 153.919, 'load_time_ms': 0.294, 'load_throughput': 870766.218, 'learn_time_ms': 24.925, 'learn_throughput': 10270.825, 'synch_weights_time_ms': 6.021}",1676032133,0,33066,33,45677_00000,11.1842


[2m[33m(raylet)[0m [2023-02-10 13:06:23,409 E 14506 14551] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-10_13-05-51_350412_13852 is over 95% full, available space: 1204297728; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-10 13:06:33,414 E 14506 14551] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-10_13-05-51_350412_13852 is over 95% full, available space: 1204154368; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-10 13:06:43,420 E 14506 14551] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-10_13-05-51_350412_13852 is over 95% full, available space: 1204060160; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-10 13:06:53,426 E 14506 14551] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-10_13-05-51_350412_13852 is over 95% full, available space: 120391