## Installation and Imports

In [1]:
import ray
from ray.rllib.algorithms.sac import SACConfig
from ray.tune.registry import register_env
from ray.tune.logger import pretty_print

from ray import air, tune
from ray.air import session
from ray.air.integrations.wandb import setup_wandb
from ray.air.integrations.wandb import WandbLoggerCallback

import gym

## Configure and Run

In [2]:
config = (
    SACConfig()
    .environment(
        env="HalfCheetah-v3",
        normalize_actions=True
    )
    .training(
        q_model_config={
            "fcnet_activation": "relu",
            "fcnet_hiddens": [256, 256]
        },
        policy_model_config={
            "fcnet_activation": "relu",
            "fcnet_hiddens": [256, 256]
        },
        tau=0.005,
        target_entropy="auto",
        n_step=1,  # num of SGD steps per batch of data in training step
        train_batch_size=256,
        target_network_update_freq=1,
        replay_buffer_config={"type":"MultiAgentPrioritizedReplayBuffer"},
        num_steps_sampled_before_learning_starts=10_000,
        optimization_config={
          "actor_learning_rate": 0.0003,
          "critic_learning_rate": 0.0003,
          "entropy_learning_rate": 0.0003,
        },
        clip_actions=False
    )
    .rollouts(
        num_rollout_workers=3,
        rollout_fragment_length=1,
    )
    .resources(num_gpus=0)
    .evaluation(evaluation_interval=100) # For 1000 timesteps iter; 100 evals
    .reporting(
        min_sample_timesteps_per_iteration=1000,
        metrics_num_episodes_for_smoothing=5
    )
    .framework(framework="torch")
)

In [3]:
wandb_init = dict(
    save_code=True,
    config={
        "env": "HalfCheetah-v3",
        
        "actor_learning_rate": 0.0003,
        "critic_learning_rate": 0.0003,
        "entropy_learning_rate": 0.0003,
        "framework": "torch",
        
        "num_rollout_workers": 3,
        "num_gpu": 0,
        "metrics_num_episodes_for_smoothing": 5
    },
    tags=["local"],
    notes="Test to inspect scaling on Vast.ai",
    name="HalfCheetah_local"
    # job_type=D
    # monitor_gym=
)

In [4]:
tuner = tune.Tuner(
    "SAC",
    run_config=air.RunConfig(
        name="HalfCheetah_vast_ai",
        stop={"training_iteration": 3_000, "episode_reward_mean": 150},
        checkpoint_config=air.CheckpointConfig(checkpoint_at_end=True),
        callbacks=[
                WandbLoggerCallback(project="HalfCheetah", save_checkpoints=True, **wandb_init)
            ],
        local_dir="./results"
        ),
    param_space=config
)

results = tuner.fit()

2023-02-09 23:25:13,054	INFO worker.py:1538 -- Started a local Ray instance.
2023-02-09 23:25:14,515	INFO wandb.py:250 -- Already logged into W&B.


0,1
Current time:,2023-02-09 23:27:57
Running for:,00:02:42.49
Memory:,4.6/7.5 GiB

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
SAC_HalfCheetah-v3_9fb81_00000,RUNNING,192.168.152.36:5255,11,93.1622,11022,-292.981,-233.998,-340.9,1000


[34m[1mwandb[0m: Currently logged in as: [33mdanieladejumo[0m. Use [1m`wandb login --relogin`[0m to force relogin
[2m[36m(SAC pid=5255)[0m 2023-02-09 23:25:18,895	INFO algorithm.py:501 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[33m(raylet)[0m [2023-02-09 23:25:22,946 E 4994 5039] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-09_23-25-09_937334_4881 is over 95% full, available space: 1206038528; capacity: 31845081088. Object creation will fail if spilling is required.


Trial name,agent_timesteps_total,counters,custom_metrics,date,done,episode_len_mean,episode_media,episode_reward_max,episode_reward_mean,episode_reward_min,episodes_this_iter,episodes_total,experiment_id,hostname,info,iterations_since_restore,node_ip,num_agent_steps_sampled,num_agent_steps_trained,num_env_steps_sampled,num_env_steps_sampled_this_iter,num_env_steps_trained,num_env_steps_trained_this_iter,num_faulty_episodes,num_healthy_workers,num_in_flight_async_reqs,num_remote_worker_restarts,num_steps_trained_this_iter,perf,pid,policy_reward_max,policy_reward_mean,policy_reward_min,sampler_perf,sampler_results,time_since_restore,time_this_iter_s,time_total_s,timers,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
SAC_HalfCheetah-v3_9fb81_00000,11022,"{'num_env_steps_sampled': 11022, 'num_env_steps_trained': 87296, 'num_agent_steps_sampled': 11022, 'num_agent_steps_trained': 87296, 'last_target_update_ts': 11022, 'num_target_updates': 341}",{},2023-02-09_23-27-01,False,1000,{},-233.998,-292.981,-340.9,0,9,359c2c77b76b4dd6977ec99c57b8d411,Daniel,"{'learner': {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 10.07202434539795, 'actor_loss': -9.025458335876465, 'critic_loss': 0.2415330410003662, 'alpha_loss': -1.027607798576355, 'alpha_value': 0.90300614, 'log_alpha_value': -0.102025956, 'target_entropy': -6.0, 'policy_t': 0.04645523428916931, 'mean_q': 5.41398286819458, 'max_q': 11.071789741516113, 'min_q': 1.4648659229278564}, 'td_error': array([2.9509466 , 0.9988861 , 1.9370294 , 1.1591816 , 1.4729459 ,  1.8506498 , 2.885347 , 1.5431514 , 0.45359862, 1.6352813 ,  1.067107 , 2.415809 , 2.4009805 , 0.66712904, 2.2120981 ,  1.9833089 , 2.4560628 , 1.2805784 , 1.3984807 , 0.40885758,  1.0757196 , 0.13333607, 0.14555717, 1.3062484 , 2.4643962 ,  2.0643098 , 0.44860196, 0.3981986 , 1.4452876 , 1.2698514 ,  3.1402712 , 2.571973 , 3.8710046 , 1.4677339 , 2.6913037 ,  0.39448595, 1.9173098 , 1.4558547 , 2.3872962 , 0.60955215,  3.5681305 , 2.483955 , 1.8751287 , 1.1586635 , 1.5558608 ,  4.234622 , 0.7035558 , 2.7190063 , 1.8820316 , 1.8628948 ,  1.6156793 , 2.031303 , 0.28688264, 0.8564153 , 0.23660326,  3.1828823 , 1.8692517 , 5.3230243 , 2.118788 , 0.6094477 ,  0.76543546, 0.6576369 , 2.4151714 , 2.1163783 , 3.802826 ,  3.0842838 , 4.595171 , 1.2762183 , 1.8338513 , 2.7085688 ,  2.1769552 , 1.8450398 , 1.0973964 , 2.3642838 , 2.8488984 ,  0.542676 , 1.2971399 , 2.0986705 , 1.9930696 , 3.6106372 ,  0.75904226, 0.6446961 , 1.3395989 , 1.0964713 , 1.1312335 ,  1.51985 , 2.3886333 , 1.1496575 , 1.2212092 , 4.634345 ,  1.1431317 , 3.5023205 , 2.7518466 , 0.64675653, 4.913612 ,  1.1415017 , 3.6226726 , 0.24486136, 0.7408836 , 1.0706868 ,  1.2121747 , 1.7321489 , 0.7289655 , 1.4608619 , 2.9639819 ,  1.6174421 , 2.5748954 , 1.884774 , 0.22317839, 2.438346 ,  0.2816062 , 0.6420634 , 3.0350065 , 1.3908501 , 3.9100094 ,  1.1554964 , 0.4355669 , 1.9591995 , 1.63557 , 3.903087 ,  2.6690068 , 0.9179816 , 2.599912 , 0.8063154 , 2.5694776 ,  1.9564964 , 1.109189 , 2.7190042 , 0.5401666 , 1.380898 ,  2.7454548 , 0.18205953, 2.0934923 , 1.0248401 , 1.4094403 ,  2.1985838 , 2.6140718 , 2.2516499 , 1.8336132 , 2.6929183 ,  1.6135666 , 1.7486076 , 1.9121552 , 1.2403853 , 3.5458894 ,  0.7683978 , 0.9451697 , 1.8377435 , 0.5707805 , 3.3786104 ,  3.225205 , 0.86801505, 1.5875199 , 0.65161586, 2.4181483 ,  2.0135152 , 3.494512 , 0.25844884, 1.822489 , 0.53197217,  1.8171518 , 1.510526 , 0.89664865, 1.8043802 , 1.7567089 ,  4.24862 , 2.1927009 , 2.4027464 , 1.2664046 , 1.6513941 ,  1.9291918 , 2.9750626 , 3.5492785 , 2.7101328 , 2.033898 ,  1.0999234 , 0.8117268 , 1.8664973 , 2.066683 , 1.4051342 ,  0.74687886, 0.11155272, 1.9677541 , 1.3607306 , 1.5288043 ,  0.25973332, 0.7631824 , 1.7051808 , 0.802094 , 0.83978724,  1.535423 , 2.1544096 , 0.9650657 , 1.6017727 , 0.33154702,  1.401068 , 4.1225753 , 0.9291835 , 0.5205152 , 1.1508821 ,  2.6086388 , 2.1520872 , 0.44897294, 2.5097456 , 1.8100348 ,  0.8794029 , 2.365128 , 1.4229324 , 1.5331357 , 0.20331597,  0.5721834 , 0.8165846 , 0.55181336, 2.0156095 , 1.1546798 ,  0.994534 , 2.2371376 , 1.3520977 , 1.440165 , 1.3498456 ,  0.90504956, 1.0942979 , 4.1667514 , 2.7432008 , 0.7208247 ,  0.2540052 , 3.2562547 , 0.06819344, 1.8134959 , 2.0130293 ,  0.25110054, 1.8268158 , 0.9371655 , 2.44393 , 1.570142 ,  0.9074135 , 1.6927862 , 1.706352 , 1.1398256 , 2.4488614 ,  3.7668762 , 1.2291183 , 0.64544713, 1.6840007 , 0.6163583 ,  0.633306 , 1.6026213 , 2.187861 , 0.80523443, 1.2110784 ,  2.1133633 , 2.4793444 , 4.3143826 , 0.8647673 , 2.7738688 ,  0.4736588 ], dtype=float32), 'mean_td_error': 1.7262011766433716, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 256.0, 'num_grad_updates_lifetime': 341.0, 'diff_num_grad_updates_vs_sampler_policy': 340.0}}, 'num_env_steps_sampled': 11022, 'num_env_steps_trained': 87296, 'num_agent_steps_sampled': 11022, 'num_agent_steps_trained': 87296, 'last_target_update_ts': 11022, 'num_target_updates': 341}",11,192.168.152.36,11022,87296,11022,1002,87296,85504,0,3,0,0,85504,"{'cpu_util_percent': 42.11200000000001, 'ram_util_percent': 60.25200000000002}",5255,{},{},{},"{'mean_raw_obs_processing_ms': 1.4851522110455198, 'mean_inference_ms': 2.506559229549628, 'mean_action_processing_ms': 0.2388331049747872, 'mean_env_wait_ms': 0.29935671098171135, 'mean_env_render_ms': 0.0}","{'episode_reward_max': -233.99772775695823, 'episode_reward_min': -340.8995194641167, 'episode_reward_mean': -292.9808384674325, 'episode_len_mean': 1000.0, 'episode_media': {}, 'episodes_this_iter': 0, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [-321.0240325291854, -297.97974884848054, -271.00316373842156, -340.8995194641167, -233.99772775695823], 'episode_lengths': [1000, 1000, 1000, 1000, 1000]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 1.4851522110455198, 'mean_inference_ms': 2.506559229549628, 'mean_action_processing_ms': 0.2388331049747872, 'mean_env_wait_ms': 0.29935671098171135, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0}",93.1622,54.4502,93.1622,"{'training_iteration_time_ms': 170.46, 'load_time_ms': 0.284, 'load_throughput': 902455.727, 'learn_time_ms': 26.165, 'learn_throughput': 9784.193, 'synch_weights_time_ms': 5.59}",1675981621,0,11022,11,9fb81_00000,9.49957


[2m[33m(raylet)[0m [2023-02-09 23:25:32,970 E 4994 5039] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-09_23-25-09_937334_4881 is over 95% full, available space: 1200185344; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-09 23:25:42,975 E 4994 5039] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-09_23-25-09_937334_4881 is over 95% full, available space: 1202999296; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-09 23:25:52,982 E 4994 5039] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-09_23-25-09_937334_4881 is over 95% full, available space: 1201119232; capacity: 31845081088. Object creation will fail if spilling is required.
[2m[33m(raylet)[0m [2023-02-09 23:26:02,992 E 4994 5039] (raylet) file_system_monitor.cc:105: /tmp/ray/session_2023-02-09_23-25-09_937334_4881 is over 95% full, available space: 1200787456; capaci