In [1]:
%load_ext autoreload
%autoreload 2

In [9]:
import argparse
import datetime
import os

import gymnasium as gym

import ray
from ray import tune


def create_env(config):
    import sliding_puzzles
    return gym.make("SlidingPuzzle-v0", **config)
tune.registry.register_env("SlidingPuzzle", create_env)

In [10]:
configs = {
    "run_id": datetime.datetime.now().strftime("%Y%m%d-%H%M%S"),
    # "policy": "MlpPolicy",
    "algorithm": "PPO",
    "total_timesteps": 1000000,
    "n_envs": 32,
    "n_workers": 8,
    "n_envs_per_worker": 4,
    "n_steps": 1000,
    "seed": 42,

    # env
    "env_w": 2,
    "env_h": 2,
    "env_shuffle_steps": 5,
    "env_sparse_rewards": True,
    "env_sparse_mode": "win",
    "env_win_reward": 10,
    "env_variation": "normalized",
    "env_image_folder": "imgs/single",
    "env_image_background": None,
}

In [11]:
analysis = tune.run(
    configs["algorithm"],
    name=configs["run_id"],
    config={
        # system settings
        "num_gpus": 1,
        "num_workers": configs["n_workers"],
        "num_envs_per_worker": configs["n_envs_per_worker"],
        "log_level": "INFO",
        "framework": "torch",
        # RL setup
        "env": "SlidingPuzzle",
        "env_config": {
            "w": configs["env_w"],
            "h": configs["env_h"],
            "shuffle_steps": configs["env_shuffle_steps"],
            "sparse_rewards": configs["env_sparse_rewards"],
            "sparse_mode": configs["env_sparse_mode"],
            "win_reward": configs["env_win_reward"],
            "variation": configs["env_variation"],
            "image_folder": configs["env_image_folder"],
            "background_color_rgb": configs["env_image_background"],
            "render_mode": "state",
        },
        # "model": {
        #     "vf_share_layers": True,
        #     "fcnet_hiddens": [512],
        # },
        # "rollout_fragment_length": 500,
        # "train_batch_size": 12000,
    },
    stop={
        "timesteps_total": configs["total_timesteps"],  # 15M
        # "time_total_s": 14400, # 4h
    },
    checkpoint_freq=100,
    checkpoint_at_end=True,
    local_dir=os.path.abspath("./ray_results"),
    # restore=os.path.join(os.path.abspath("./ray_results"), "PPO_selfplay_1/PPO_Soccer_ID/checkpoint_00X/checkpoint-X",
)

2024-02-14 11:23:44,450	INFO tune.py:583 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949




0,1
Current time:,2024-02-14 11:41:50
Running for:,00:18:05.95
Memory:,16.1/31.2 GiB

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SlidingPuzzle_a8c17_00000,TERMINATED,192.168.0.24:38425,250,1070.49,1000000,10,10,10,5.47541


[36m(pid=38425)[0m   from pkg_resources import resource_stream, resource_exists
[36m(pid=38425)[0m Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
[36m(pid=38425)[0m   declare_namespace(pkg)
[36m(pid=38425)[0m   _nlv = LooseVersion(_np_version)
[36m(pid=38425)[0m   np_version_under1p17 = _nlv < LooseVersion("1.17")
[36m(pid=38425)[0m   np_version_under1p18 = _nlv < LooseVersion("1.18")
[36m(pid=38425)[0m   _np_version_under1p19 = _nlv < LooseVersion("1.19")
[36m(pid=38425)[0m   _np_version_under1p20 = _nlv < LooseVersion("1.20")
[36m(pid=38425)[0m   other = LooseVersion(other)
[36m(pid=38425)[0m   if LooseVersion(__version__) >= LooseVersion("1.17.0"):
[36m(pid=38425)[0m   if LooseVersion(__version__) >= LooseVersion("1.17.0"):
[36m(pid=38425)[0m   if (distutils.version.LooseVersion(tf.__version__) <


Trial name,agent_timesteps_total,connector_metrics,counters,custom_metrics,episode_len_mean,episode_media,episode_reward_max,episode_reward_mean,episode_reward_min,episodes_this_iter,info,num_agent_steps_sampled,num_agent_steps_trained,num_env_steps_sampled,num_env_steps_sampled_this_iter,num_env_steps_sampled_throughput_per_sec,num_env_steps_trained,num_env_steps_trained_this_iter,num_env_steps_trained_throughput_per_sec,num_faulty_episodes,num_healthy_workers,num_in_flight_async_reqs,num_remote_worker_restarts,num_steps_trained_this_iter,perf,policy_reward_max,policy_reward_mean,policy_reward_min,sampler_perf,sampler_results,timers
PPO_SlidingPuzzle_a8c17_00000,1000000,"{'ObsPreprocessorConnector_ms': 0.004010741176500998, 'StateBufferConnector_ms': 0.006714577231902242, 'ViewRequirementAgentConnector_ms': 0.08132985380829358}","{'num_env_steps_sampled': 1000000, 'num_env_steps_trained': 1000000, 'num_agent_steps_sampled': 1000000, 'num_agent_steps_trained': 1000000}",{},5.47541,{},10,10,10,732,"{'learner': {'default_policy': {'custom_metrics': {}, 'learner_stats': {'cur_kl_coeff': 0.15622361248072889, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 9.830374779239778, 'policy_loss': -0.007189364563072881, 'vf_loss': 9.836191519870553, 'vf_explained_var': -0.014089324781971594, 'kl': 0.008785818402980943, 'entropy': 0.2809671331958104, 'entropy_coeff': 0.0}, 'model': {}, 'num_grad_updates_lifetime': 232035.5, 'diff_num_grad_updates_vs_sampler_policy': 464.5}}, 'num_env_steps_sampled': 1000000, 'num_env_steps_trained': 1000000, 'num_agent_steps_sampled': 1000000, 'num_agent_steps_trained': 1000000}",1000000,1000000,1000000,4000,970.205,1000000,4000,970.205,0,8,0,0,4000,"{'cpu_util_percent': 14.950000000000001, 'ram_util_percent': 51.63333333333333}",{},{},{},"{'mean_raw_obs_processing_ms': 1.5763720564960906, 'mean_inference_ms': 1.1400035134446922, 'mean_action_processing_ms': 0.2630564654408566, 'mean_env_wait_ms': 0.15089443774083397, 'mean_env_render_ms': 0.0}","{'episode_reward_max': 10.0, 'episode_reward_min': 10.0, 'episode_reward_mean': 10.0, 'episode_len_mean': 5.475409836065574, 'episode_media': {}, 'episodes_this_iter': 732, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0], 'episode_lengths': [5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 5, 6, 5, 5, 8, 7, 5, 5, 9, 5, 5, 5, 5, 6, 6, 5, 5, 5, 5, 5, 6, 5, 5, 6, 8, 5, 5, 5, 5, 5, 5, 5, 6, 5, 5, 5, 6, 6, 5, 6, 6, 5, 5, 6, 5, 5, 6, 5, 5, 6, 5, 5, 6, 6, 6, 5, 5, 6, 5, 5, 5, 6, 5, 5, 5, 9, 6, 5, 5, 5, 5, 5, 6, 5, 6, 5, 6, 5, 5, 6, 7, 6, 5, 5, 5, 8, 5, 7, 6, 6, 6, 5, 5, 5, 9, 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 5, 5, 5, 5, 6, 5, 6, 7, 5, 8, 5, 5, 5, 5, 5, 5, 5, 7, 6, 6, 5, 6, 6, 5, 6, 6, 7, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 7, 6, 6, 5, 5, 5, 6, 5, 5, 6, 5, 5, 6, 6, 5, 5, 6, 5, 6, 6, 6, 5, 5, 5, 5, 5, 5, 6, 8, 6, 5, 6, 6, 6, 6, 6, 5, 5, 5, 5, 6, 5, 6, 5, 5, 8, 5, 5, 5, 5, 5, 5, 8, 5, 5, 5, 8, 5, 7, 6, 5, 5, 5, 5, 6, 6, 5, 5, 7, 6, 6, 5, 5, 5, 6, 5, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 7, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 5, 7, 5, 5, 5, 5, 5, 6, 6, 5, 5, 6, 5, 5, 5, 5, 5, 6, 5, 5, 7, 5, 5, 5, 5, 5, 5, 5, 6, 5, 6, 5, 5, 5, 5, 5, 6, 5, 5, 5, 6, 5, 6, 5, 5, 5, 6, 5, 5, 6, 5, 7, 5, 5, 5, 5, 6, 5, 5, 5, 5, 5, 6, 5, 5, 5, 8, 5, 6, 6, 5, 5, 5, 7, 7, 5, 5, 6, 5, 5, 5, 6, 5, 6, 5, 6, 5, 5, 5, 6, 5, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 6, 6, 5, 7, 5, 6, 5, 5, 5, 6, 5, 8, 5, 6, 5, 5, 5, 5, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 7, 5, 5, 6, 5, 5, 5, 5, 5, 5, 6, 6, 6, 5, 5, 5, 6, 5, 6, 7, 6, 5, 7, 5, 6, 6, 5, 5, 6, 6, 6, 5, 5, 6, 5, 5, 5, 5, 6, 6, 5, 5, 6, 5, 5, 5, 5, 5, 6, 5, 7, 7, 5, 5, 6, 5, 5, 5, 5, 5, 5, 5, 6, 9, 6, 5, 5, 5, 6, 5, 7, 5, 5, 5, 5, 5, 5, 8, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 9, 5, 5, 5, 5, 5, 5, 7, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 6, 5, 5, 7, 5, 5, 5, 5, 5, 7, 6, 6, 5, 5, 5, 6, 5, 5, 6, 6, 5, 5, 8, 5, 8, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 6, 5, 5, 5, 6, 5, 5, 8, 6, 5, 5, 7, 6, 6, 5, 6, 11, 5, 6, 5, 5, 5, 5, 6, 5, 6, 6, 7, 7, 5, 6, 5, 5, 5, 6, 5, 5, 6, 5, 5, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5, 6, 6, 5, 5, 5, 5, 6, 5, 5, 5, 5, 5, 6, 6, 6, 5, 5, 6, 5, 5, 5, 5, 6, 7, 5, 6, 5, 7, 6, 6, 5, 5, 5, 5, 8, 5, 5, 5, 5, 6, 6, 7, 7, 6, 7, 5, 6, 5, 5, 5, 5, 5, 5, 6, 5, 5, 5, 7, 5, 5, 6, 5, 6, 7, 6, 5, 6, 7, 6, 7, 5, 6, 6, 5, 5, 6, 5, 5, 6, 5, 6, 6, 6, 7, 6, 6, 5, 6, 5, 5, 6, 5, 5, 5, 5, 5, 5, 7, 5, 5, 5, 5, 7, 7, 9, 5, 7, 5, 5, 6, 5, 5, 7, 6, 5, 5, 5, 5, 5, 6, 5, 8, 5, 6, 6, 5, 5, 6, 5, 7, 5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 6, 5, 5, 5, 5, 5]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 1.5763720564960906, 'mean_inference_ms': 1.1400035134446922, 'mean_action_processing_ms': 0.2630564654408566, 'mean_env_wait_ms': 0.15089443774083397, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0, 'connector_metrics': {'ObsPreprocessorConnector_ms': 0.004010741176500998, 'StateBufferConnector_ms': 0.006714577231902242, 'ViewRequirementAgentConnector_ms': 0.08132985380829358}}","{'training_iteration_time_ms': 4180.064, 'sample_time_ms': 500.927, 'load_time_ms': 0.933, 'load_throughput': 4285805.957, 'learn_time_ms': 3671.349, 'learn_throughput': 1089.518, 'synch_weights_time_ms': 6.388}"


[36m(PPO pid=38425)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/mnt/data/Documents/sliding-puzzle/ray_results/20240214-112343/PPO_SlidingPuzzle_a8c17_00000_0_2024-02-14_11-23-44/checkpoint_000000)
[36m(pid=38510)[0m   _np_version_under1p20 = _nlv < LooseVersion("1.20")[32m [repeated 28x across cluster][0m
[36m(pid=38510)[0m   if LooseVersion(__version__) >= LooseVersion("1.17.0"):[32m [repeated 14x across cluster][0m
[36m(pid=38510)[0m   _nlv = LooseVersion(_np_version)[32m [repeated 6x across cluster][0m
[36m(pid=38510)[0m   other = LooseVersion(other)[32m [repeated 6x across cluster][0m
[36m(pid=38510)[0m   if (distutils.version.LooseVersion(tf.__version__) <[32m [repeated 6x across cluster][0m
[36m(pid=38510)[0m   distutils.version.LooseVersion(required_tensorflow_version)):[32m [repeated 6x across cluster][0m
[36m(RolloutWorker pid=38510)[0m   logger.warn([32m [repeated 37x across cluster][0m
[36m(RolloutWorker pid=38510)

In [12]:
# Gets best trial based on max accuracy across all training iterations.
best_trial = analysis.get_best_trial("episode_reward_mean", mode="max")
print(best_trial)
# Gets best checkpoint for trial based on accuracy.
best_checkpoint = analysis.get_best_checkpoint(
    trial=best_trial, metric="episode_reward_mean", mode="max"
)
print(best_checkpoint)
print("Done training")

PPO_SlidingPuzzle_a8c17_00000
Checkpoint(filesystem=local, path=/mnt/data/Documents/sliding-puzzle/ray_results/20240214-112343/PPO_SlidingPuzzle_a8c17_00000_0_2024-02-14_11-23-44/checkpoint_000000)
Done training
