# Ray RLlib 강화학습 실험

이 노트북은 Ray RLlib을 사용한 강화학습 연구의 기본 템플릿입니다.

In [1]:
# 필수 라이브러리 import
import ray
from ray.rllib.algorithms.sac import SACConfig
from ray.rllib.algorithms.sac.torch.default_sac_torch_rl_module import DefaultSACTorchRLModule
from ray.rllib.connectors.env_to_module import EnvToModulePipeline
from ray.rllib.connectors.module_to_env import ModuleToEnvPipeline
from ray.rllib.core.columns import Columns
from ray.rllib.core.rl_module.rl_module import RLModule, RLModuleSpec
from ray.rllib.core.rl_module.default_model_config import DefaultModelConfig
from ray.rllib.env.single_agent_episode import SingleAgentEpisode
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
import os
import sys
import torch

PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath("__file__")))
sys.path.append(PROJECT_ROOT)

# 패키지 Util 함수
from utils.utils import * 

print("라이브러리 import 완료")

라이브러리 import 완료


## 1. Ray 초기화

In [2]:
# Ray 초기화
# 로컬 모드로 실행 (디버깅용)
# ray.init(local_mode=True)

# 일반 모드로 실행 (실험용)
ray.init(ignore_reinit_error=True)

print(f"Ray 클러스터 정보: {ray.cluster_resources()}")

2026-01-16 16:44:21,102	INFO worker.py:2007 -- Started a local Ray instance.


Ray 클러스터 정보: {'node:10.254.233.114': 1.0, 'GPU': 1.0, 'CPU': 32.0, 'accelerator_type:G': 1.0, 'memory': 182405512397.0, 'object_store_memory': 78173791027.0, 'node:__internal_head__': 1.0}




## 2. 환경 설정

**참고**: 실제 환경은 별도 프로젝트로 관리되므로, 여기서는 예시로 Gymnasium 환경을 사용합니다.
실제 환경을 사용할 때는 해당 환경의 import 경로를 수정하세요.

In [3]:
ENV_NAME = "horcrux_env/plane-v0"

# 환경 정보 확인
import gymnasium as gym
import horcrux_env
from horcrux_env.envs import PlaneJoyDirWorld

env = gym.make(ENV_NAME)
print(f"환경: {ENV_NAME}")
print(f"관찰 공간: {env.observation_space}")
print(f"행동 공간: {env.action_space}")

ENV_OBS_SPACE = env.observation_space
ENV_ACTION_SPACE = env.action_space


env.close()

환경: horcrux_env/plane-v0
관찰 공간: Box(-inf, inf, (97,), float32)
행동 공간: Box(0.0, 2.7, (14,), float32)


In [4]:
ENV_CONFIG = {
    "forward_reward_weight": 175.0,
    "rotation_reward_weight": 100.0,
    "unhealthy_max_steps": 80.0,
    "healthy_reward": 3.0,
    "healthy_roll_range": (-40,40),
    "terminating_roll_range": (-80,80),
    "rotation_norm_cost_weight": 7.5,
    "termination_reward": 0,
    "gait_params": (30, 30, 40, 40, 0, 1),
    "use_friction_chg": True,
    "joy_input_random": False,
    "use_imu_window": True,
    "use_vels_window": True,
    "ctrl_cost_weight": 0.05,
}

RENDER_ENV_CONFIG = ENV_CONFIG.copy()
RENDER_ENV_CONFIG['render_mode'] = 'rgb_array'
RENDER_ENV_CONFIG['render_camera_name'] = 'ceiling'

# env = gym.make(ENV_NAME, **RENDER_ENV_CONFIG)

from ray.tune.registry import register_env
register_env(ENV_NAME, lambda config: PlaneJoyDirWorld( forward_reward_weight=ENV_CONFIG["forward_reward_weight"], 
                                                     rotation_reward_weight=ENV_CONFIG["rotation_reward_weight"], 
                                                     unhealthy_max_steps=ENV_CONFIG["unhealthy_max_steps"],
                                                     healthy_reward=ENV_CONFIG["healthy_reward"], 
                                                     healthy_roll_range=ENV_CONFIG["healthy_roll_range"],
                                                     terminating_roll_range=ENV_CONFIG["terminating_roll_range"],
                                                     rotation_norm_cost_weight=ENV_CONFIG["rotation_norm_cost_weight"],
                                                     termination_reward=ENV_CONFIG["termination_reward"],
                                                     gait_params=ENV_CONFIG["gait_params"],
                                                     use_friction_chg=ENV_CONFIG["use_friction_chg"],
                                                     joy_input_random=ENV_CONFIG["joy_input_random"],
                                                     use_imu_window=ENV_CONFIG["use_imu_window"],
                                                     ctrl_cost_weight=ENV_CONFIG["ctrl_cost_weight"],
                                                   )
            )

## 3. RLlib 알고리즘 설정

In [5]:
# PPO 알고리즘 설정 (다른 알고리즘으로 변경 가능: DQN, A3C, SAC 등)
config = (
    SACConfig()
    .environment(env=ENV_NAME)
    .training(
        gamma=0.90,
        model={
            "fcnet_hiddens": [512, 512, 512, 512, 512, 512, 48],
            "fcnet_activation": "relu",
            "vf_share_layers": False,
        },
        q_model_config={
            "fcnet_hiddens": [512, 512, 512, 512, 512, 512, 48],
            "fcnet_activation": "relu",
            "vf_share_layers": False,
        },
        policy_model_config={
            "fcnet_hiddens": [512, 512, 512, 512, 512, 512, 48],
            "fcnet_activation": "relu",
            "vf_share_layers": False,
        },
        train_batch_size=16384,
        train_batch_size_per_learner=8192,
        optimization_config={
            "actor_learning_rate": 3e-5,
            "critic_learning_rate": 3e-5,
            "entropy_learning_rate": 3e-5,
        },
        n_step=5,
        target_entropy="auto",
        replay_buffer_config={
            "capacity": 500000,
        },
    )
    .resources(num_gpus=1)  # GPU 사용 시 1로 변경
    .framework("torch")  # "tf2" 또는 "torch"
    .env_runners(
        num_env_runners=16,
        num_envs_per_env_runner=1,
    )
    .learners(
        num_learners=1,
        num_gpus_per_learner=1,
    )
    .rl_module(
        # rl_module_spec= RLModuleSpec(
        #     module_class=DefaultSACTorchRLModule,
        #     observation_space=ENV_OBS_SPACE,
        #     action_space=ENV_ACTION_SPACE,
        # ),
        model_config=DefaultModelConfig(
            fcnet_hiddens=[512, 512, 512, 512, 512, 512, 48],
            fcnet_activation="relu",
            vf_share_layers=False,
        ),
    )
)

print("알고리즘 설정 완료")
print(config)

알고리즘 설정 완료
<ray.rllib.algorithms.sac.sac.SACConfig object at 0x7ce09825d340>


## 4. 알고리즘 인스턴스 생성 및 학습

In [6]:
# 알고리즘 인스턴스 생성
algo = config.build_algo()

print("알고리즘 인스턴스 생성 완료")

`UnifiedLogger` will be removed in Ray 2.7.
  return UnifiedLogger(config, logdir, loggers=None)
The `JsonLogger interface is deprecated in favor of the `ray.tune.json.JsonLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
The `CSVLogger interface is deprecated in favor of the `ray.tune.csv.CSVLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
The `TBXLogger interface is deprecated in favor of the `ray.tune.tensorboardx.TBXLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
[2026-01-16 16:44:25,967 E 201240 201240] core_worker.cc:2223: Actor with class name: 'SingleAgentEnvRunner' and ID: '998510d69e0677e671494c2701000000' has constructor arguments in the object store and max_restarts > 0. If the arguments in the object store go out of scope or are lost, the actor restart

알고리즘 인스턴스 생성 완료


In [7]:
# 학습 실행
NUM_ITERATIONS = 10000  # 학습 반복 횟수

import pprint

for i in range(NUM_ITERATIONS):
    result = algo.train()
    
    if (i + 1) % 150 == 0:
        print(f"Iteration {i + 1}")
        print(algo.save(algo.logdir))

print(algo.save(algo.logdir))
print("학습 완료")

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
[36m(pid=gcs_server)[0m [2026-01-16 16:44:50,192 E 201418 201418] (gcs_server) gcs_server.cc:303: Failed to establish connection to the event+metrics exporter agent. Events and metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14
[33m(raylet)[0m [2026-01-16 16:44:51,021 E 201580 201580] (raylet) main.cc:1032: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14
  return np.nanmean(self._values_to_merge)
[36m(SingleAgentEnvRunner pid=201683)[0m [2026-01-16 16:44:52,220 E 201683 202045] core_worker_process.cc:842: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Ru

Iteration 150
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 3.9343356280005537, 'restore_env_runners': 7.40291943657212e-06, 'training_step': 1.8166025631227602, 'env_runner_sampling_timer': 0.028656448571891816, 'replay_buffer_add_data_timer': 0.013374729179158749, 'replay_buffer_sampling_timer': 0.41186511042906204, 'learner_update_timer': 1.288758144157291, 'replay_buffer_update_prios_timer': 0.041310748443029295, 'synch_weights': 0.007154060171196761, 'synch_env_connectors': 0.0026488670009712223}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.762213893862577e-05), 'get_actions': np.float64(0.00034585558864552593), 'un_batch_to_individual_items': np.float64(1.741324182238926e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.914245606256361e-06), 'normalize_and_clip_actions'

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 300
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 4.367190215001756, 'restore_env_runners': 7.965527620399371e-06, 'training_step': 2.4836421978514407, 'env_runner_sampling_timer': 0.029944865109064266, 'replay_buffer_add_data_timer': 0.014083505729831813, 'replay_buffer_sampling_timer': 0.7226380662287193, 'learner_update_timer': 1.637499427847033, 'replay_buffer_update_prios_timer': 0.04533022774026904, 'synch_weights': 0.007899559110082919, 'synch_env_connectors': 0.003325148001749767}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.895001297843991e-05), 'get_actions': np.float64(0.0003587355371786767), 'un_batch_to_individual_items': np.float64(1.724786373212798e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.959625553674907e-06), 'normalize_and_clip_actions': np

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 450
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 4.116718219000177, 'restore_env_runners': 7.114627551345621e-06, 'training_step': 1.8693938104518384, 'env_runner_sampling_timer': 0.027826943382206083, 'replay_buffer_add_data_timer': 0.014952442491994589, 'replay_buffer_sampling_timer': 0.7941471476273364, 'learner_update_timer': 0.9506254624993016, 'replay_buffer_update_prios_timer': 0.047866965310749944, 'synch_weights': 0.0071508716612152055, 'synch_env_connectors': 0.0033875089975481387}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.8943143609350767e-05), 'get_actions': np.float64(0.00036343046892219294), 'un_batch_to_individual_items': np.float64(1.6920464152353885e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.953842224766959e-06), 'normalize_and_clip_actio

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 600
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 4.269571757999074, 'restore_env_runners': 7.272529110196047e-06, 'training_step': 1.9921235292194979, 'env_runner_sampling_timer': 0.02707137697041617, 'replay_buffer_add_data_timer': 0.0132402234084293, 'replay_buffer_sampling_timer': 0.41725556536224756, 'learner_update_timer': 1.4439461721187037, 'replay_buffer_update_prios_timer': 0.05466233301955071, 'synch_weights': 0.008072280119959032, 'synch_env_connectors': 0.0036818049993598834}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.854589918289434e-05), 'get_actions': np.float64(0.00034829845728959967), 'un_batch_to_individual_items': np.float64(1.619127777135146e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.8303597598710827e-06), 'normalize_and_clip_actions': 

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 750
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 4.79717072800122, 'restore_env_runners': 1.0268771438859403e-05, 'training_step': 2.2860632338089633, 'env_runner_sampling_timer': 0.029467091916631032, 'replay_buffer_add_data_timer': 0.014942802028926963, 'replay_buffer_sampling_timer': 0.8729347236977991, 'learner_update_timer': 1.2798809653010539, 'replay_buffer_update_prios_timer': 0.05411940138972569, 'synch_weights': 0.007537415290717036, 'synch_env_connectors': 0.003399768000235781}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.8939757686787645e-05), 'get_actions': np.float64(0.0003606816348004073), 'un_batch_to_individual_items': np.float64(1.672477616208476e-05), 'remove_single_ts_time_rank_from_batch': np.float64(2.0751539583869874e-06), 'normalize_and_clip_actions':

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 900
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 4.73042463100137, 'restore_env_runners': 6.2684471049578864e-06, 'training_step': 2.114628294212416, 'env_runner_sampling_timer': 0.02734593405984924, 'replay_buffer_add_data_timer': 0.014887932839847053, 'replay_buffer_sampling_timer': 0.9215789782379943, 'learner_update_timer': 1.062550068522214, 'replay_buffer_update_prios_timer': 0.053651055458576596, 'synch_weights': 0.006151016888543381, 'synch_env_connectors': 0.003716744999110233}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.8382099299308414e-05), 'get_actions': np.float64(0.0003486638875928561), 'un_batch_to_individual_items': np.float64(1.6323987406533794e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.919099574739354e-06), 'normalize_and_clip_actions': n

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 1050
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 4.616917538001871, 'restore_env_runners': 7.434631806972902e-06, 'training_step': 2.1893057619900715, 'env_runner_sampling_timer': 0.02910011046020372, 'replay_buffer_add_data_timer': 0.017557823258612187, 'replay_buffer_sampling_timer': 0.4249065401516782, 'learner_update_timer': 1.623865016017262, 'replay_buffer_update_prios_timer': 0.057409261227840035, 'synch_weights': 0.0066701637103687966, 'synch_env_connectors': 0.0036284570014686324}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.8528096203064553e-05), 'get_actions': np.float64(0.00039091870425842065), 'un_batch_to_individual_items': np.float64(1.6653425836175754e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.930965880347645e-06), 'normalize_and_clip_action

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 1200
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 4.381824937001511, 'restore_env_runners': 8.44711139507126e-06, 'training_step': 2.2191941757070524, 'env_runner_sampling_timer': 0.027715044352589757, 'replay_buffer_add_data_timer': 0.01541851225825667, 'replay_buffer_sampling_timer': 1.0072350011091111, 'learner_update_timer': 1.0742802053082778, 'replay_buffer_update_prios_timer': 0.058405473320963214, 'synch_weights': 0.008186561741968034, 'synch_env_connectors': 0.00466206599958241}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.7696844244881856e-05), 'get_actions': np.float64(0.00034535555596353366), 'un_batch_to_individual_items': np.float64(1.6249267789008643e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.835854894904736e-06), 'normalize_and_clip_actions':

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 1350
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 4.520108905002417, 'restore_env_runners': 7.004820363363251e-06, 'training_step': 2.8975005858826623, 'env_runner_sampling_timer': 0.027458813170487702, 'replay_buffer_add_data_timer': 0.0170603404225767, 'replay_buffer_sampling_timer': 1.016359733429126, 'learner_update_timer': 1.7348652895572376, 'replay_buffer_update_prios_timer': 0.06372792388054223, 'synch_weights': 0.008222225910903944, 'synch_env_connectors': 0.004691338999691652}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.997588349723043e-05), 'get_actions': np.float64(0.00036089651416334946), 'un_batch_to_individual_items': np.float64(1.7186115833226238e-05), 'remove_single_ts_time_rank_from_batch': np.float64(2.02686311965195e-06), 'normalize_and_clip_actions': np

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 1500
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 4.904207904997747, 'restore_env_runners': 6.946300018171314e-06, 'training_step': 2.570221357049231, 'env_runner_sampling_timer': 0.025904738841563812, 'replay_buffer_add_data_timer': 0.019601845281322312, 'replay_buffer_sampling_timer': 0.4204577121606053, 'learner_update_timer': 2.0041911440200058, 'replay_buffer_update_prios_timer': 0.06357453858956433, 'synch_weights': 0.008892692429835734, 'synch_env_connectors': 0.00472712699775002}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.8246557098083218e-05), 'get_actions': np.float64(0.00034079136581851606), 'un_batch_to_individual_items': np.float64(1.6480965613833445e-05), 'remove_single_ts_time_rank_from_batch': np.float64(2.150293848439318e-06), 'normalize_and_clip_actions':

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 1650
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 5.330007694999949, 'restore_env_runners': 6.3482498444500375e-06, 'training_step': 2.6617879981868464, 'env_runner_sampling_timer': 0.02732705838832771, 'replay_buffer_add_data_timer': 0.017116140870639354, 'replay_buffer_sampling_timer': 0.42562405228985883, 'learner_update_timer': 2.0893070634805917, 'replay_buffer_update_prios_timer': 0.06482154049041128, 'synch_weights': 0.00903931524964719, 'synch_env_connectors': 0.004726389997813385}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(3.015073004897148e-05), 'get_actions': np.float64(0.00035731235104730454), 'un_batch_to_individual_items': np.float64(1.7046444051626103e-05), 'remove_single_ts_time_rank_from_batch': np.float64(2.0413385104602192e-06), 'normalize_and_clip_actions

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 1800
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 5.020193600001221, 'restore_env_runners': 7.75304128183052e-06, 'training_step': 1.898047262250475, 'env_runner_sampling_timer': 0.030100510400261554, 'replay_buffer_add_data_timer': 0.01958408956965286, 'replay_buffer_sampling_timer': 0.42657661764085786, 'learner_update_timer': 1.320355250143257, 'replay_buffer_update_prios_timer': 0.06362537488268571, 'synch_weights': 0.009142689050167975, 'synch_env_connectors': 0.00486966999960714}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.9112618310220232e-05), 'get_actions': np.float64(0.00033699161253276195), 'un_batch_to_individual_items': np.float64(1.6521006316317654e-05), 'remove_single_ts_time_rank_from_batch': np.float64(2.002478481527114e-06), 'normalize_and_clip_actions': n

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 1950
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 5.144225486001233, 'restore_env_runners': 6.603739348065574e-06, 'training_step': 1.9091679324921278, 'env_runner_sampling_timer': 0.02778794044024835, 'replay_buffer_add_data_timer': 0.020861482821710523, 'replay_buffer_sampling_timer': 0.433351351728561, 'learner_update_timer': 1.3269131560903407, 'replay_buffer_update_prios_timer': 0.06332057227245969, 'synch_weights': 0.00914854051956354, 'synch_env_connectors': 0.0052875639994454104}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.847791504233694e-05), 'get_actions': np.float64(0.0003487190532642988), 'un_batch_to_individual_items': np.float64(1.6713597168333494e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.997471739572301e-06), 'normalize_and_clip_actions': n

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 2100
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 5.720079579001322, 'restore_env_runners': 7.506139372708276e-06, 'training_step': 2.444991745120606, 'env_runner_sampling_timer': 0.03572002391174465, 'replay_buffer_add_data_timer': 0.018372355510364285, 'replay_buffer_sampling_timer': 1.1895693571099764, 'learner_update_timer': 1.0979118156188996, 'replay_buffer_update_prios_timer': 0.06505842526967172, 'synch_weights': 0.009313942762601073, 'synch_env_connectors': 0.005684101997758262}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.9781590497756115e-05), 'get_actions': np.float64(0.0003459530226431832), 'un_batch_to_individual_items': np.float64(1.6960330304236026e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.991641898519773e-06), 'normalize_and_clip_actions': 

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 2250
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 5.22807765700054, 'restore_env_runners': 7.128060751711019e-06, 'training_step': 3.313877260819318, 'env_runner_sampling_timer': 0.02560982267965301, 'replay_buffer_add_data_timer': 0.019718263581817153, 'replay_buffer_sampling_timer': 1.2357320479686678, 'learner_update_timer': 1.926490666887803, 'replay_buffer_update_prios_timer': 0.06674187466804142, 'synch_weights': 0.010637665218200709, 'synch_env_connectors': 0.005804405998787843}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.811641491278293e-05), 'get_actions': np.float64(0.000336267458029424), 'un_batch_to_individual_items': np.float64(1.6215276534440278e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.8831322696057656e-06), 'normalize_and_clip_actions': np.

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 2400
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 4.1988577419979265, 'restore_env_runners': 1.416940984199755e-05, 'training_step': 1.6793576771206427, 'env_runner_sampling_timer': 0.02788811411883216, 'replay_buffer_add_data_timer': 0.02294278887937253, 'replay_buffer_sampling_timer': 0.44212250782446066, 'learner_update_timer': 1.0795080485012294, 'replay_buffer_update_prios_timer': 0.07013366028433665, 'synch_weights': 0.008712144412347697, 'synch_env_connectors': 0.006508457001473289}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.865236781666146e-05), 'get_actions': np.float64(0.00034558843740390777), 'un_batch_to_individual_items': np.float64(1.6939147350626853e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.9646182157434488e-06), 'normalize_and_clip_actions

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 2550
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 5.221050508000189, 'restore_env_runners': 8.377415797440335e-06, 'training_step': 2.6418724620417198, 'env_runner_sampling_timer': 0.027386430045371526, 'replay_buffer_add_data_timer': 0.02331520584215468, 'replay_buffer_sampling_timer': 1.3707639703759198, 'learner_update_timer': 1.1120438540108444, 'replay_buffer_update_prios_timer': 0.07068569367220334, 'synch_weights': 0.009050315014319495, 'synch_env_connectors': 0.008542233998014126}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.8398895230435445e-05), 'get_actions': np.float64(0.0003360893831684809), 'un_batch_to_individual_items': np.float64(1.6174386557084978e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.9077360304678222e-06), 'normalize_and_clip_actions'

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 2700
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 5.190786729006504, 'restore_env_runners': 6.801421623094939e-06, 'training_step': 2.6140825020262852, 'env_runner_sampling_timer': 0.02732889052014798, 'replay_buffer_add_data_timer': 0.021293351351778254, 'replay_buffer_sampling_timer': 1.381048910294703, 'learner_update_timer': 1.0765047442664946, 'replay_buffer_update_prios_timer': 0.0691446047109639, 'synch_weights': 0.009523848760436522, 'synch_env_connectors': 0.00649784499546513}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(3.0006160422865832e-05), 'get_actions': np.float64(0.0003511398397256819), 'un_batch_to_individual_items': np.float64(1.720417470244484e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.942239532909464e-06), 'normalize_and_clip_actions': np.

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 2850
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 5.417461122000532, 'restore_env_runners': 1.1170681100338697e-05, 'training_step': 2.7242792198494135, 'env_runner_sampling_timer': 0.028073854564718205, 'replay_buffer_add_data_timer': 0.022408236379924347, 'replay_buffer_sampling_timer': 0.4289066658212687, 'learner_update_timer': 2.1394492311832436, 'replay_buffer_update_prios_timer': 0.06794494550944363, 'synch_weights': 0.008555024451779901, 'synch_env_connectors': 0.007305682003789116}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.9578146137972526e-05), 'get_actions': np.float64(0.000331948100287009), 'un_batch_to_individual_items': np.float64(1.574176937281899e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.8954120299966437e-06), 'normalize_and_clip_actions'

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 3000
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 5.781860266994045, 'restore_env_runners': 7.554921321570873e-06, 'training_step': 2.9950429413524398, 'env_runner_sampling_timer': 0.02620766032479878, 'replay_buffer_add_data_timer': 0.02313707515044371, 'replay_buffer_sampling_timer': 0.4269202999836125, 'learner_update_timer': 2.4114796423550433, 'replay_buffer_update_prios_timer': 0.06972514770066482, 'synch_weights': 0.009213658661174124, 'synch_env_connectors': 0.006721165002090856}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.8388306428234674e-05), 'get_actions': np.float64(0.00033839994782275704), 'un_batch_to_individual_items': np.float64(1.643591349203384e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.9397537391760897e-06), 'normalize_and_clip_actions':

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 3150
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 4.797612508999009, 'restore_env_runners': 7.723235321464018e-06, 'training_step': 2.8575729502606557, 'env_runner_sampling_timer': 0.03394109727385512, 'replay_buffer_add_data_timer': 0.0257934867595759, 'replay_buffer_sampling_timer': 0.44052884795055436, 'learner_update_timer': 2.2433613280359714, 'replay_buffer_update_prios_timer': 0.07287917593617749, 'synch_weights': 0.009728316998007358, 'synch_env_connectors': 0.007885699997132178}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.9345572808495888e-05), 'get_actions': np.float64(0.00034331658300200776), 'un_batch_to_individual_items': np.float64(1.647208951261528e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.9532784055448424e-06), 'normalize_and_clip_actions':

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 3300
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 5.758990841000923, 'restore_env_runners': 6.063067630748265e-06, 'training_step': 2.987684067205555, 'env_runner_sampling_timer': 0.02907607381392154, 'replay_buffer_add_data_timer': 0.024863878905161983, 'replay_buffer_sampling_timer': 1.4967543996532187, 'learner_update_timer': 1.3228940271692409, 'replay_buffer_update_prios_timer': 0.07058552075955958, 'synch_weights': 0.009820347725108149, 'synch_env_connectors': 0.008160320998285897}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.8250951054380322e-05), 'get_actions': np.float64(0.00036126959052179265), 'un_batch_to_individual_items': np.float64(1.635138431315418e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.9374177818476728e-06), 'normalize_and_clip_actions':

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 3450
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 4.881891719996929, 'restore_env_runners': 7.860210462240503e-06, 'training_step': 2.8973149344753617, 'env_runner_sampling_timer': 0.031181210664508397, 'replay_buffer_add_data_timer': 0.027652694044663802, 'replay_buffer_sampling_timer': 0.4525313248790189, 'learner_update_timer': 2.267429104214025, 'replay_buffer_update_prios_timer': 0.07275038163585124, 'synch_weights': 0.009840035581073606, 'synch_env_connectors': 0.007087296005920507}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.777971109649225e-05), 'get_actions': np.float64(0.0003315648401045442), 'un_batch_to_individual_items': np.float64(1.568868074062285e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.9043267356038631e-06), 'normalize_and_clip_actions': 

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 3600
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 5.682500950999383, 'restore_env_runners': 6.110045142122545e-06, 'training_step': 2.860294632999721, 'env_runner_sampling_timer': 0.03344647541111044, 'replay_buffer_add_data_timer': 0.029898448873937014, 'replay_buffer_sampling_timer': 0.4574938964990724, 'learner_update_timer': 2.2191904941023677, 'replay_buffer_update_prios_timer': 0.07469603009463754, 'synch_weights': 0.010868278986527003, 'synch_env_connectors': 0.007419809997372795}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.917244872644206e-05), 'get_actions': np.float64(0.0003502158580705937), 'un_batch_to_individual_items': np.float64(1.673960590954112e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.9124764012807984e-06), 'normalize_and_clip_actions': n

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 3750
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 5.653800993000914, 'restore_env_runners': 6.550961406901478e-06, 'training_step': 2.819659580245061, 'env_runner_sampling_timer': 0.03050946172508702, 'replay_buffer_add_data_timer': 0.027809062139931486, 'replay_buffer_sampling_timer': 1.5380045681483896, 'learner_update_timer': 1.1044197706331034, 'replay_buffer_update_prios_timer': 0.07289309681895248, 'synch_weights': 0.011131732547437422, 'synch_env_connectors': 0.007992576996912248}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.899778579656514e-05), 'get_actions': np.float64(0.0003537337222904597), 'un_batch_to_individual_items': np.float64(1.64560081026737e-05), 'remove_single_ts_time_rank_from_batch': np.float64(2.090582845663398e-06), 'normalize_and_clip_actions': np.

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 3900
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 4.958257359001436, 'restore_env_runners': 7.322616656892933e-06, 'training_step': 2.9411136716994224, 'env_runner_sampling_timer': 0.034425699710554904, 'replay_buffer_add_data_timer': 0.027816288735339183, 'replay_buffer_sampling_timer': 0.4558234428137803, 'learner_update_timer': 2.305839753213586, 'replay_buffer_update_prios_timer': 0.07119840362211106, 'synch_weights': 0.010592427647279691, 'synch_env_connectors': 0.008570114994654432}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.8125840859489485e-05), 'get_actions': np.float64(0.0003380767238898422), 'un_batch_to_individual_items': np.float64(1.6286464940163863e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.9006685807862526e-06), 'normalize_and_clip_actions'

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 4050
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 5.948283099998662, 'restore_env_runners': 8.343951049027965e-06, 'training_step': 2.7999473302817934, 'env_runner_sampling_timer': 0.035940805572317915, 'replay_buffer_add_data_timer': 0.03187115139451635, 'replay_buffer_sampling_timer': 0.45069966155868313, 'learner_update_timer': 2.1641091806009354, 'replay_buffer_update_prios_timer': 0.07070761380200566, 'synch_weights': 0.010908477071279777, 'synch_env_connectors': 0.010562138995737769}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.912209770185454e-05), 'get_actions': np.float64(0.0003633818243695266), 'un_batch_to_individual_items': np.float64(1.7426580304999328e-05), 'remove_single_ts_time_rank_from_batch': np.float64(2.048167202190709e-06), 'normalize_and_clip_actions':

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 4200
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 5.981702802993823, 'restore_env_runners': 8.245609060395508e-06, 'training_step': 3.086861332501285, 'env_runner_sampling_timer': 0.03672139569884166, 'replay_buffer_add_data_timer': 0.034266321109098496, 'replay_buffer_sampling_timer': 0.45151999246736524, 'learner_update_timer': 2.4470209931118005, 'replay_buffer_update_prios_timer': 0.07176216539337474, 'synch_weights': 0.011006962673200178, 'synch_env_connectors': 0.008406335997278802}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.784722064713139e-05), 'get_actions': np.float64(0.0003564810729574379), 'un_batch_to_individual_items': np.float64(1.6533186249594055e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.937327389473971e-06), 'normalize_and_clip_actions': 

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 4350
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 6.015061309000885, 'restore_env_runners': 1.0026211966760457e-05, 'training_step': 3.117306382200404, 'env_runner_sampling_timer': 0.033472955131219354, 'replay_buffer_add_data_timer': 0.03152674006560119, 'replay_buffer_sampling_timer': 1.5654596412165118, 'learner_update_timer': 1.3653261398847099, 'replay_buffer_update_prios_timer': 0.0739393786087021, 'synch_weights': 0.01109473720214737, 'synch_env_connectors': 0.009060145996045321}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.859644849825846e-05), 'get_actions': np.float64(0.000334940884186826), 'un_batch_to_individual_items': np.float64(1.701298682991059e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.8992767563853362e-06), 'normalize_and_clip_actions': np.

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 4500
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 5.910890658000426, 'restore_env_runners': 1.3471617785398849e-05, 'training_step': 2.814375322323831, 'env_runner_sampling_timer': 0.03219659112182854, 'replay_buffer_add_data_timer': 0.03023890200223832, 'replay_buffer_sampling_timer': 1.5649634190829238, 'learner_update_timer': 1.0678533479721228, 'replay_buffer_update_prios_timer': 0.06904192011039413, 'synch_weights': 0.013706836267811013, 'synch_env_connectors': 0.009865519001323264}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.885202050815973e-05), 'get_actions': np.float64(0.0003419932099852969), 'un_batch_to_individual_items': np.float64(1.7040670619180816e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.9572520259438605e-06), 'normalize_and_clip_actions': 

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 4650
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 4.827408295001078, 'restore_env_runners': 6.758354138582945e-06, 'training_step': 1.9874111261612415, 'env_runner_sampling_timer': 0.03402576225576922, 'replay_buffer_add_data_timer': 0.03216736319373013, 'replay_buffer_sampling_timer': 0.46649149802869944, 'learner_update_timer': 1.3402878605149453, 'replay_buffer_update_prios_timer': 0.06790946127104688, 'synch_weights': 0.010885475287141162, 'synch_env_connectors': 0.006338493003568146}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.98172568788363e-05), 'get_actions': np.float64(0.00036129648953463027), 'un_batch_to_individual_items': np.float64(1.729117335856397e-05), 'remove_single_ts_time_rank_from_batch': np.float64(2.0094878516282122e-06), 'normalize_and_clip_actions': 

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 4800
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 6.052589906001231, 'restore_env_runners': 6.980237158131786e-06, 'training_step': 3.1213097520319573, 'env_runner_sampling_timer': 0.030140620921447408, 'replay_buffer_add_data_timer': 0.029507348998668022, 'replay_buffer_sampling_timer': 0.4502497544213839, 'learner_update_timer': 2.4971391303839483, 'replay_buffer_update_prios_timer': 0.06947154208319262, 'synch_weights': 0.009471745936389197, 'synch_env_connectors': 0.006409660003555473}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.8729151597747345e-05), 'get_actions': np.float64(0.00033249524070435156), 'un_batch_to_individual_items': np.float64(1.6519134068832247e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.9379691410186725e-06), 'normalize_and_clip_action

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 4950
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 6.159159600996645, 'restore_env_runners': 6.627770198974758e-06, 'training_step': 3.174721983476265, 'env_runner_sampling_timer': 0.03300973011726455, 'replay_buffer_add_data_timer': 0.0303602598526777, 'replay_buffer_sampling_timer': 0.45784956191899256, 'learner_update_timer': 2.5365995598297744, 'replay_buffer_update_prios_timer': 0.07223629894120677, 'synch_weights': 0.010720109262692859, 'synch_env_connectors': 0.006627443006436806}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.9395981823352606e-05), 'get_actions': np.float64(0.000354230832570924), 'un_batch_to_individual_items': np.float64(1.665708876405079e-05), 'remove_single_ts_time_rank_from_batch': np.float64(2.1685207088176365e-06), 'normalize_and_clip_actions': np

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 5100
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 5.906034494997584, 'restore_env_runners': 7.588120133732445e-06, 'training_step': 2.9541728307461015, 'env_runner_sampling_timer': 0.03240932766057086, 'replay_buffer_add_data_timer': 0.02762817151284253, 'replay_buffer_sampling_timer': 0.4551895319503819, 'learner_update_timer': 2.3243940748286698, 'replay_buffer_update_prios_timer': 0.0710020295551658, 'synch_weights': 0.01067449199355906, 'synch_env_connectors': 0.007917489994724747}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.937006250379105e-05), 'get_actions': np.float64(0.00034155699368761366), 'un_batch_to_individual_items': np.float64(1.657452352620645e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.9533454520421472e-06), 'normalize_and_clip_actions': np

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 5250
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 6.019028875998629, 'restore_env_runners': 7.1032395499059935e-06, 'training_step': 2.8563404783567963, 'env_runner_sampling_timer': 0.03297591593349353, 'replay_buffer_add_data_timer': 0.03296816786431009, 'replay_buffer_sampling_timer': 1.5655037928580713, 'learner_update_timer': 1.10494314999014, 'replay_buffer_update_prios_timer': 0.07212932887683564, 'synch_weights': 0.011839853871133527, 'synch_env_connectors': 0.007305125996936113}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.9169648717240853e-05), 'get_actions': np.float64(0.00034096451807190094), 'un_batch_to_individual_items': np.float64(1.670094771966119e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.9401920048787995e-06), 'normalize_and_clip_actions': 

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 5400
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 6.163651151000522, 'restore_env_runners': 9.049693544511684e-06, 'training_step': 2.9210616143915105, 'env_runner_sampling_timer': 0.032652840627488335, 'replay_buffer_add_data_timer': 0.032034733243854134, 'replay_buffer_sampling_timer': 1.6198148647940251, 'learner_update_timer': 1.1181114683909255, 'replay_buffer_update_prios_timer': 0.07161764887307072, 'synch_weights': 0.011484765448694815, 'synch_env_connectors': 0.011515468002471607}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(3.1035134075576105e-05), 'get_actions': np.float64(0.000338839647428187), 'un_batch_to_individual_items': np.float64(1.658255307516421e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.913647335366734e-06), 'normalize_and_clip_actions': 

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 5550
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 5.74532576600177, 'restore_env_runners': 7.84901887527667e-06, 'training_step': 2.871996046438944, 'env_runner_sampling_timer': 0.034657478026347235, 'replay_buffer_add_data_timer': 0.03182002999245014, 'replay_buffer_sampling_timer': 1.5972836382736568, 'learner_update_timer': 1.081947742962293, 'replay_buffer_update_prios_timer': 0.07763234570025815, 'synch_weights': 0.012626651259997743, 'synch_env_connectors': 0.009602979000192136}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.9535630841858055e-05), 'get_actions': np.float64(0.00035215656013984733), 'un_batch_to_individual_items': np.float64(1.6967180594402912e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.977951329845159e-06), 'normalize_and_clip_actions': np

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 5700
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 5.830592170997988, 'restore_env_runners': 8.31091107102111e-06, 'training_step': 2.9282480797125023, 'env_runner_sampling_timer': 0.03605359012239205, 'replay_buffer_add_data_timer': 0.0328051305077679, 'replay_buffer_sampling_timer': 1.6174600610612107, 'learner_update_timer': 1.1187807945071109, 'replay_buffer_update_prios_timer': 0.07211800625947944, 'synch_weights': 0.013928419730873429, 'synch_env_connectors': 0.009276205993955955}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.9104585828522047e-05), 'get_actions': np.float64(0.0003477212577950757), 'un_batch_to_individual_items': np.float64(1.6534020845505817e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.9307200462892318e-06), 'normalize_and_clip_actions': n

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 5850
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 5.917232165003952, 'restore_env_runners': 7.5499094236874955e-06, 'training_step': 2.955596855688564, 'env_runner_sampling_timer': 0.032099255743960386, 'replay_buffer_add_data_timer': 0.03378316955451737, 'replay_buffer_sampling_timer': 0.45754850245721174, 'learner_update_timer': 2.316111203287146, 'replay_buffer_update_prios_timer': 0.06967669778881827, 'synch_weights': 0.011759263505518902, 'synch_env_connectors': 0.01032310799928382}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.8725742112064902e-05), 'get_actions': np.float64(0.00034825162308216556), 'un_batch_to_individual_items': np.float64(1.62622987234028e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.925235112039072e-06), 'normalize_and_clip_actions': n

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 6000
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 5.036203982002917, 'restore_env_runners': 8.697803714312612e-06, 'training_step': 3.2410568526795394, 'env_runner_sampling_timer': 0.037292840624650124, 'replay_buffer_add_data_timer': 0.0353520222032239, 'replay_buffer_sampling_timer': 0.46310772281438406, 'learner_update_timer': 2.5855581463331325, 'replay_buffer_update_prios_timer': 0.06842774028948043, 'synch_weights': 0.012736192420998122, 'synch_env_connectors': 0.010210779000772163}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.901716604584877e-05), 'get_actions': np.float64(0.00034242543701334754), 'un_batch_to_individual_items': np.float64(1.7125833157439445e-05), 'remove_single_ts_time_rank_from_batch': np.float64(2.3520683547452883e-06), 'normalize_and_clip_actions'

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 6150
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 6.141231638997851, 'restore_env_runners': 7.927925616968422e-06, 'training_step': 3.1760459406097654, 'env_runner_sampling_timer': 0.03309395281052275, 'replay_buffer_add_data_timer': 0.0349453791873384, 'replay_buffer_sampling_timer': 1.6323057568989314, 'learner_update_timer': 1.3536892305067159, 'replay_buffer_update_prios_timer': 0.07174578932936128, 'synch_weights': 0.012905890836991602, 'synch_env_connectors': 0.00997682200249983}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.9337796562810735e-05), 'get_actions': np.float64(0.00034040937363805036), 'un_batch_to_individual_items': np.float64(1.6363691134258513e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.909439133136957e-06), 'normalize_and_clip_actions': n

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 6300
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 6.114245902004768, 'restore_env_runners': 8.016895953915082e-06, 'training_step': 3.1602222124480615, 'env_runner_sampling_timer': 0.03376593937646249, 'replay_buffer_add_data_timer': 0.03471070136925846, 'replay_buffer_sampling_timer': 1.6358004739729948, 'learner_update_timer': 1.3384037509848714, 'replay_buffer_update_prios_timer': 0.0700687738427223, 'synch_weights': 0.01101338606698846, 'synch_env_connectors': 0.010907916002906859}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.9999047811031022e-05), 'get_actions': np.float64(0.0003554353313918741), 'un_batch_to_individual_items': np.float64(1.706284759580864e-05), 'remove_single_ts_time_rank_from_batch': np.float64(2.298537922701696e-06), 'normalize_and_clip_actions': np.

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 6450
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 4.864457108000352, 'restore_env_runners': 8.416411801590585e-06, 'training_step': 1.725352703567696, 'env_runner_sampling_timer': 0.03576440655109764, 'replay_buffer_add_data_timer': 0.03486739604020841, 'replay_buffer_sampling_timer': 0.4717724992021249, 'learner_update_timer': 1.06269156182585, 'replay_buffer_update_prios_timer': 0.07135849236059584, 'synch_weights': 0.013342433010984677, 'synch_env_connectors': 0.010120656006620266}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.9105411505953024e-05), 'get_actions': np.float64(0.00034667485210672003), 'un_batch_to_individual_items': np.float64(1.6545790284227007e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.934001220018824e-06), 'normalize_and_clip_actions': np

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 6600
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 5.998772857004951, 'restore_env_runners': 9.023481616168282e-06, 'training_step': 3.0017181349206656, 'env_runner_sampling_timer': 0.03263171709033486, 'replay_buffer_add_data_timer': 0.0329989323503105, 'replay_buffer_sampling_timer': 0.459839683017708, 'learner_update_timer': 2.3553481911303242, 'replay_buffer_update_prios_timer': 0.07418095522967634, 'synch_weights': 0.01081161472677195, 'synch_env_connectors': 0.007206989001133479}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.7965246718792237e-05), 'get_actions': np.float64(0.0003392535980166587), 'un_batch_to_individual_items': np.float64(1.6198721476284745e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.9002983411662512e-06), 'normalize_and_clip_actions': np

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 6750
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 6.050692461998551, 'restore_env_runners': 9.190694327116945e-06, 'training_step': 3.0117349647996163, 'env_runner_sampling_timer': 0.03416022391211299, 'replay_buffer_add_data_timer': 0.03357202343686368, 'replay_buffer_sampling_timer': 0.46372551910215404, 'learner_update_timer': 2.3577893087422854, 'replay_buffer_update_prios_timer': 0.07428017047583126, 'synch_weights': 0.011204621891447459, 'synch_env_connectors': 0.008022992995392997}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.8866798768971537e-05), 'get_actions': np.float64(0.0003416296948105027), 'un_batch_to_individual_items': np.float64(1.6518265791850596e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.9312260505685684e-06), 'normalize_and_clip_actions'

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 6900
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 4.617055896000238, 'restore_env_runners': 7.79601847170852e-06, 'training_step': 1.7598506895677566, 'env_runner_sampling_timer': 0.034482217989498166, 'replay_buffer_add_data_timer': 0.036541268551154645, 'replay_buffer_sampling_timer': 0.4744729443851247, 'learner_update_timer': 1.0923710507975921, 'replay_buffer_update_prios_timer': 0.07457015229410899, 'synch_weights': 0.010764258310373408, 'synch_env_connectors': 0.007749691998469643}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(3.0064844201645962e-05), 'get_actions': np.float64(0.00035072872514025407), 'un_batch_to_individual_items': np.float64(1.6905843664264973e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.9818051406731136e-06), 'normalize_and_clip_actions

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 7050
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 5.760562563002168, 'restore_env_runners': 8.915285507100634e-06, 'training_step': 2.892488979949485, 'env_runner_sampling_timer': 0.0336832178903569, 'replay_buffer_add_data_timer': 0.035849297397362534, 'replay_buffer_sampling_timer': 1.6211342800019337, 'learner_update_timer': 1.0806872959286558, 'replay_buffer_update_prios_timer': 0.07388171072016121, 'synch_weights': 0.010633507474631187, 'synch_env_connectors': 0.007624377001775429}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.9408538082012012e-05), 'get_actions': np.float64(0.0003405401939269247), 'un_batch_to_individual_items': np.float64(1.6834035525699958e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.973271220006152e-06), 'normalize_and_clip_actions': n

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 7200
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 4.630281990997901, 'restore_env_runners': 7.147229916881769e-06, 'training_step': 1.746308861575162, 'env_runner_sampling_timer': 0.034514347468939374, 'replay_buffer_add_data_timer': 0.03572832951096643, 'replay_buffer_sampling_timer': 0.4642020578181109, 'learner_update_timer': 1.09454637703333, 'replay_buffer_update_prios_timer': 0.06989456858762423, 'synch_weights': 0.010382489485709813, 'synch_env_connectors': 0.009383372998854611}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.9954190215386355e-05), 'get_actions': np.float64(0.0003480531587517782), 'un_batch_to_individual_items': np.float64(1.6722277549912744e-05), 'remove_single_ts_time_rank_from_batch': np.float64(2.0175087009156184e-06), 'normalize_and_clip_actions': n

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 7350
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 4.6209357639963855, 'restore_env_runners': 7.751112862024455e-06, 'training_step': 1.7479389005558914, 'env_runner_sampling_timer': 0.031166193047974956, 'replay_buffer_add_data_timer': 0.03594563750419184, 'replay_buffer_sampling_timer': 0.46437748842770815, 'learner_update_timer': 1.1006942959435402, 'replay_buffer_update_prios_timer': 0.06975225150541518, 'synch_weights': 0.010369134581051185, 'synch_env_connectors': 0.007864929997595027}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.7718095683489995e-05), 'get_actions': np.float64(0.00033402533488656363), 'un_batch_to_individual_items': np.float64(1.6330041114042148e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.8825403190129825e-06), 'normalize_and_clip_actio

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 7500
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 6.113524631000473, 'restore_env_runners': 6.335767902783119e-06, 'training_step': 2.9112120538576707, 'env_runner_sampling_timer': 0.03396473633220012, 'replay_buffer_add_data_timer': 0.0336288420670462, 'replay_buffer_sampling_timer': 1.6327660682013085, 'learner_update_timer': 1.0909565006888442, 'replay_buffer_update_prios_timer': 0.07241690343624214, 'synch_weights': 0.010095918097140385, 'synch_env_connectors': 0.007480934000341222}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.8114283981961748e-05), 'get_actions': np.float64(0.0003318149549428298), 'un_batch_to_individual_items': np.float64(1.6433510483481007e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.9386577720235254e-06), 'normalize_and_clip_actions': 

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 7650
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 6.010625303999404, 'restore_env_runners': 7.533332245657221e-06, 'training_step': 2.8686986888096726, 'env_runner_sampling_timer': 0.03455073111384991, 'replay_buffer_add_data_timer': 0.0345929161109234, 'replay_buffer_sampling_timer': 1.6011193904712127, 'learner_update_timer': 1.0762295043183985, 'replay_buffer_update_prios_timer': 0.07043464454371132, 'synch_weights': 0.015608453227614517, 'synch_env_connectors': 0.011670305997540709}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.9220793677405507e-05), 'get_actions': np.float64(0.00033695064621840906), 'un_batch_to_individual_items': np.float64(1.642999007012656e-05), 'remove_single_ts_time_rank_from_batch': np.float64(2.1659124005656325e-06), 'normalize_and_clip_actions': 

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 7800
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 6.108347376000893, 'restore_env_runners': 8.219610317610203e-06, 'training_step': 2.9022225231373158, 'env_runner_sampling_timer': 0.034446941362330105, 'replay_buffer_add_data_timer': 0.03494589221649221, 'replay_buffer_sampling_timer': 1.6430120620142408, 'learner_update_timer': 1.0685715550692112, 'replay_buffer_update_prios_timer': 0.07206034423019446, 'synch_weights': 0.012049250261770793, 'synch_env_connectors': 0.011308663997624535}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.8904932431585458e-05), 'get_actions': np.float64(0.0003395682101162141), 'un_batch_to_individual_items': np.float64(1.67453976034163e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.9205506903255854e-06), 'normalize_and_clip_actions': 

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 7950
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 6.084615953004686, 'restore_env_runners': 8.785500103840604e-06, 'training_step': 3.155631775681541, 'env_runner_sampling_timer': 0.03376963268943655, 'replay_buffer_add_data_timer': 0.03555969711080252, 'replay_buffer_sampling_timer': 1.6560205195737216, 'learner_update_timer': 1.3109948888122016, 'replay_buffer_update_prios_timer': 0.07029536678921432, 'synch_weights': 0.012285214149887905, 'synch_env_connectors': 0.011725462005415466}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.909578631320108e-05), 'get_actions': np.float64(0.00035389933806434813), 'un_batch_to_individual_items': np.float64(1.6710166666146925e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.8819826337985453e-06), 'normalize_and_clip_actions': 

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 8100
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 6.16508443299972, 'restore_env_runners': 7.471811841242015e-06, 'training_step': 3.2036302180815253, 'env_runner_sampling_timer': 0.03641833834430144, 'replay_buffer_add_data_timer': 0.03892004085493682, 'replay_buffer_sampling_timer': 1.6582866756221484, 'learner_update_timer': 1.350340692551763, 'replay_buffer_update_prios_timer': 0.0688198130109231, 'synch_weights': 0.013356966180654127, 'synch_env_connectors': 0.012126394001825247}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.8850598038925073e-05), 'get_actions': np.float64(0.00033973866739486313), 'un_batch_to_individual_items': np.float64(1.6379733766210394e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.9114756245528405e-06), 'normalize_and_clip_actions': n

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 8250
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 5.899231930990936, 'restore_env_runners': 7.288709457498044e-06, 'training_step': 2.9624295714545585, 'env_runner_sampling_timer': 0.03730631455924595, 'replay_buffer_add_data_timer': 0.04177561966716894, 'replay_buffer_sampling_timer': 1.673628423474438, 'learner_update_timer': 1.0920298542358795, 'replay_buffer_update_prios_timer': 0.07009053128102095, 'synch_weights': 0.011345361231506102, 'synch_env_connectors': 0.012038047003443353}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.8992252013872867e-05), 'get_actions': np.float64(0.0003393673042597254), 'un_batch_to_individual_items': np.float64(1.6844818832263546e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.93610980493881e-06), 'normalize_and_clip_actions': np

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 8400
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 5.082383335990016, 'restore_env_runners': 8.236514695454389e-06, 'training_step': 3.3020614927522547, 'env_runner_sampling_timer': 0.03672807435767026, 'replay_buffer_add_data_timer': 0.03710999145536334, 'replay_buffer_sampling_timer': 0.4623251998562773, 'learner_update_timer': 2.645377675022319, 'replay_buffer_update_prios_timer': 0.07172163865761831, 'synch_weights': 0.012259792160621145, 'synch_env_connectors': 0.012128503003623337}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.9097616125999456e-05), 'get_actions': np.float64(0.00034068822860508854), 'un_batch_to_individual_items': np.float64(1.7201935729347028e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.9497961167512495e-06), 'normalize_and_clip_actions':

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 8550
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 5.021215782995569, 'restore_env_runners': 9.427520271856338e-06, 'training_step': 2.9895114389709487, 'env_runner_sampling_timer': 0.03334322767725098, 'replay_buffer_add_data_timer': 0.03606441556083155, 'replay_buffer_sampling_timer': 0.46049787755328, 'learner_update_timer': 2.3369490660401064, 'replay_buffer_update_prios_timer': 0.07262990009883652, 'synch_weights': 0.012348877008334966, 'synch_env_connectors': 0.0121949339954881}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.8265977487053052e-05), 'get_actions': np.float64(0.0003285287418481705), 'un_batch_to_individual_items': np.float64(1.6227148102830505e-05), 'remove_single_ts_time_rank_from_batch': np.float64(2.0729000957592457e-06), 'normalize_and_clip_actions': np.

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 8700
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 6.052364945993759, 'restore_env_runners': 7.863597566029058e-06, 'training_step': 3.033411774830747, 'env_runner_sampling_timer': 0.03701038559025619, 'replay_buffer_add_data_timer': 0.037224125292705135, 'replay_buffer_sampling_timer': 0.46701284938899335, 'learner_update_timer': 2.3675396022466884, 'replay_buffer_update_prios_timer': 0.07372809246444376, 'synch_weights': 0.013316069274296752, 'synch_env_connectors': 0.012292247993173078}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.847008294170402e-05), 'get_actions': np.float64(0.0003389027996512723), 'un_batch_to_individual_items': np.float64(1.6731798848878557e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.8974899413447474e-06), 'normalize_and_clip_actions':

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 8850
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 6.297504533009487, 'restore_env_runners': 7.661827839910985e-06, 'training_step': 2.9973793374595696, 'env_runner_sampling_timer': 0.03708806118826033, 'replay_buffer_add_data_timer': 0.04089144600337022, 'replay_buffer_sampling_timer': 1.6654805951911837, 'learner_update_timer': 1.1325006550476246, 'replay_buffer_update_prios_timer': 0.06872470667789457, 'synch_weights': 0.01412951681893901, 'synch_env_connectors': 0.012150559996371157}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.898999699308556e-05), 'get_actions': np.float64(0.00033484837122834413), 'un_batch_to_individual_items': np.float64(1.6004367845054107e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.8776778381977556e-06), 'normalize_and_clip_actions': 

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 9000
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 6.278334228991298, 'restore_env_runners': 7.748118368908763e-06, 'training_step': 3.247627992863272, 'env_runner_sampling_timer': 0.038385708246787546, 'replay_buffer_add_data_timer': 0.04373712657601572, 'replay_buffer_sampling_timer': 1.65433706274227, 'learner_update_timer': 1.3884470906517525, 'replay_buffer_update_prios_timer': 0.07068540424574166, 'synch_weights': 0.013078473213972756, 'synch_env_connectors': 0.012191376998089254}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.7869637895411552e-05), 'get_actions': np.float64(0.0003323885973212158), 'un_batch_to_individual_items': np.float64(1.6035475964534785e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.8654298727725151e-06), 'normalize_and_clip_actions': n

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 9150
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 6.39844462700421, 'restore_env_runners': 1.496551267337054e-05, 'training_step': 3.3393323246105866, 'env_runner_sampling_timer': 0.0445009606781241, 'replay_buffer_add_data_timer': 0.05013406855694484, 'replay_buffer_sampling_timer': 0.4699341736672796, 'learner_update_timer': 2.652932106108347, 'replay_buffer_update_prios_timer': 0.07061082846514183, 'synch_weights': 0.012081765269831521, 'synch_env_connectors': 0.01388924999628216}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(3.025476800964723e-05), 'get_actions': np.float64(0.00033862002288865764), 'un_batch_to_individual_items': np.float64(1.671290040365997e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.925009454367191e-06), 'normalize_and_clip_actions': np.fl

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 9300
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 6.209961795990239, 'restore_env_runners': 8.097700774669647e-06, 'training_step': 2.967281445849658, 'env_runner_sampling_timer': 0.036987821016082306, 'replay_buffer_add_data_timer': 0.03729965269667446, 'replay_buffer_sampling_timer': 1.6639599945921508, 'learner_update_timer': 1.105959101319895, 'replay_buffer_update_prios_timer': 0.07202783151064068, 'synch_weights': 0.013021676815405954, 'synch_env_connectors': 0.012061542001902126}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.789176578926106e-05), 'get_actions': np.float64(0.0003396537781090329), 'un_batch_to_individual_items': np.float64(1.6117453658855228e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.891908836032144e-06), 'normalize_and_clip_actions': np

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 9450
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 6.306210988012026, 'restore_env_runners': 7.54380103899166e-06, 'training_step': 3.2644439419938136, 'env_runner_sampling_timer': 0.03831848194182384, 'replay_buffer_add_data_timer': 0.039934405583044286, 'replay_buffer_sampling_timer': 1.6777774092862092, 'learner_update_timer': 1.3816383453749586, 'replay_buffer_update_prios_timer': 0.07544495405789349, 'synch_weights': 0.012869781392510049, 'synch_env_connectors': 0.011982983007328585}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(3.178310735405178e-05), 'get_actions': np.float64(0.0003468772051300102), 'un_batch_to_individual_items': np.float64(1.71855316686165e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.9679104557420574e-06), 'normalize_and_clip_actions': np

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 9600
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 6.066198954998981, 'restore_env_runners': 7.938304042909294e-06, 'training_step': 3.0393338485200365, 'env_runner_sampling_timer': 0.039681653271836696, 'replay_buffer_add_data_timer': 0.03925043605413521, 'replay_buffer_sampling_timer': 0.47275632334771217, 'learner_update_timer': 2.3615593175355754, 'replay_buffer_update_prios_timer': 0.07030389397870748, 'synch_weights': 0.015158206306368811, 'synch_env_connectors': 0.01206191499659326}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.8252431908152967e-05), 'get_actions': np.float64(0.0003426087450238303), 'un_batch_to_individual_items': np.float64(1.610905854744182e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.8959393783315361e-06), 'normalize_and_clip_actions':

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 9750
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 6.019707040002686, 'restore_env_runners': 1.5148509555729105e-05, 'training_step': 3.02906828966341, 'env_runner_sampling_timer': 0.03906723303589388, 'replay_buffer_add_data_timer': 0.04797133354601101, 'replay_buffer_sampling_timer': 1.6674318491964368, 'learner_update_timer': 1.1434328605282644, 'replay_buffer_update_prios_timer': 0.07479339678087854, 'synch_weights': 0.01517523700473248, 'synch_env_connectors': 0.012321812988375314}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.9944494432107334e-05), 'get_actions': np.float64(0.000352776322789541), 'un_batch_to_individual_items': np.float64(2.2961091015593878e-05), 'remove_single_ts_time_rank_from_batch': np.float64(2.011675254925044e-06), 'normalize_and_clip_actions': np.

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

Iteration 9900
TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 6.294592641002964, 'restore_env_runners': 8.013252809178085e-06, 'training_step': 3.257280716551904, 'env_runner_sampling_timer': 0.03701593005709583, 'replay_buffer_add_data_timer': 0.03434566063340753, 'replay_buffer_sampling_timer': 1.6571075991910766, 'learner_update_timer': 1.403912250206049, 'replay_buffer_update_prios_timer': 0.07093397805525456, 'synch_weights': 0.013403083981538657, 'synch_env_connectors': 0.012500925004133023}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.8667573208355308e-05), 'get_actions': np.float64(0.0003424073602108847), 'un_batch_to_individual_items': np.float64(1.6517435545959946e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.9278080969614486e-06), 'normalize_and_clip_actions': n

  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np.nanmean(self._values_to_merge)
  return np

TrainingResult(checkpoint=Checkpoint(filesystem=local, path=/home/bong/ray_results/SAC_horcrux_env-plane-v0_2026-01-16_16-44-25hskugxr4), metrics={'timers': {'training_iteration': 6.0052714129997185, 'restore_env_runners': 7.72174826124683e-06, 'training_step': 2.9947632345350574, 'env_runner_sampling_timer': 0.033684803084906886, 'replay_buffer_add_data_timer': 0.03699217003508238, 'replay_buffer_sampling_timer': 1.6643565236710127, 'learner_update_timer': 1.1343045873515076, 'replay_buffer_update_prios_timer': 0.07392535923077959, 'synch_weights': 0.013334114992030664, 'synch_env_connectors': 0.011980711002252065}, 'env_runners': {'module_to_env_connector': {'timers': {'connectors': {'listify_data_for_vector_env': np.float64(2.7998036472508182e-05), 'get_actions': np.float64(0.00034049371116404907), 'un_batch_to_individual_items': np.float64(1.597802138160309e-05), 'remove_single_ts_time_rank_from_batch': np.float64(1.9597229561406946e-06), 'normalize_and_clip_actions': np.float64(6.

  return np.nanmean(self._values_to_merge)


# 5. 학습 결과 평가

In [8]:
from ray.rllib.connectors.env_to_module import EnvToModulePipeline
from ray.rllib.connectors.module_to_env import ModuleToEnvPipeline

# 학습된 정책으로 평가 (새 API 스택 사용)
NUM_EVAL_EPISODES = 10
eval_results = []

# RLModule과 Connector 파이프라인 가져오기
rl_module = algo.get_module()

# EnvToModule과 ModuleToEnv 파이프라인 가져오기
# algo 객체에서 직접 가져올 수 있는 경우
try:
    # env_runner에서 connector 가져오기
    env_runner = algo.env_runner
    env_to_module = env_runner._connectors[0] if hasattr(env_runner, '_connectors') else None
    module_to_env = env_runner._connectors[1] if hasattr(env_runner, '_connectors') and len(env_runner._connectors) > 1 else None
except:
    env_to_module = None
    module_to_env = None

# 파이프라인을 직접 생성해야 하는 경우
if env_to_module is None or module_to_env is None:
    # 간단한 방법: RLModule만 사용하고 직접 처리
    from ray.rllib.core.columns import Columns
    
    env = gym.make(ENV_NAME, render_mode=None)
    device = getattr(rl_module, "device", next(rl_module.parameters()).device)
    
    for episode in range(NUM_EVAL_EPISODES):
        obs, info = env.reset()
        episode_reward = 0
        episode_length = 0
        done = False
        
        # Episode 객체 생성
        episode_obj = SingleAgentEpisode(
            observations=[obs],
            observation_space=env.observation_space,
            action_space=env.action_space,
        )
        
        # Option 1
        while not done:
            # 관찰을 텐서로 변환
            obs_batch = np.expand_dims(obs, 0).astype(np.float32)
            obs_tensor = torch.from_numpy(obs_batch).to(device)
            
            # forward_inference 사용 (평가 시에는 탐색 없음)
            rl_module_out = rl_module.forward_inference({"obs": obs_tensor})
            
            # ModuleToEnv 파이프라인이 없으므로 직접 처리
            # action_dist_inputs에서 액션 추출
            if Columns.ACTION_DIST_INPUTS in rl_module_out:
                action_dist_inputs = rl_module_out[Columns.ACTION_DIST_INPUTS]
            elif "action_dist_inputs" in rl_module_out:
                action_dist_inputs = rl_module_out["action_dist_inputs"]
            else:
                # 다른 키 확인
                action_dist_inputs = list(rl_module_out.values())[0]
            
            # action_dist_inputs 처리 (mean만 사용하거나 분포에서 샘플링)
            if isinstance(action_dist_inputs, torch.Tensor):
                dist_params = action_dist_inputs[0] if len(action_dist_inputs.shape) > 1 else action_dist_inputs
                
                if len(dist_params.shape) == 0 or dist_params.shape[0] == 14:
                    # mean만 있는 경우 (deterministic)
                    action_tensor = dist_params[:14] if len(dist_params) >= 14 else dist_params
                elif dist_params.shape[0] == 28:
                    # mean(14) + log_std(14) 구조
                    mean = dist_params[:14]
                    log_std = dist_params[14:]
                    std = torch.exp(log_std.clamp(-20, 2))
                    from torch.distributions import Normal
                    normal_dist = Normal(mean, std)
                    action_tensor = normal_dist.sample()
                else:
                    action_tensor = dist_params
                
                # 액션 공간에 맞게 클리핑
                action_low = torch.tensor(env.action_space.low, device=device, dtype=action_tensor.dtype)
                action_high = torch.tensor(env.action_space.high, device=device, dtype=action_tensor.dtype)
                
                
                bounded_action = torch.tanh(action_tensor[:14])
                # print(f'over:{(bounded_action[0:14] > 1).sum()}, below:{(bounded_action[0:14] < -1).sum()}, total:{(bounded_action[0:14] > 1).sum() + (bounded_action[0:14] < -1).sum()}')
                
                real_action = action_low + (0.5 * (bounded_action + 1.0) * (action_high - action_low))
                # print(real_action)

                action = real_action.detach().cpu().numpy()
            else:
                action = np.array(action_dist_inputs)
                if len(action.shape) > 1:
                    action = action[0]
                action = np.clip(action, env.action_space.low, env.action_space.high)
            
            obs, reward, terminated, truncated, info = env.step(action)
            done = terminated or truncated
            episode_reward += reward
            episode_length += 1
            
            if episode_length > 6000:
                done = True

            # Episode 업데이트
            episode_obj.add_env_step(
                obs,
                action,
                reward,
                terminated=terminated,
                truncated=truncated,
            )
        
        # Option 2
        # while not done:
        #     input_dict = env_to_module(
        #         episodes=[episode_obj],
        #         rl_module=rl_module,
        #         explore=False,
        #         shared_data={},
        #     )

        #     rl_module_out = rl_module.forward_inference(input_dict)

        #     to_env = module_to_env(
        #         batch=rl_module_out,
        #         episodes=[episode_obj],
        #         rl_module=rl_module,
        #         explore=False,
        #         shared_data={},
        #     )

        #     action = to_env.pop(Columns.ACTIONS)[0]
            
        #     obs, reward, terminated, truncated, info = env.step(action)
        #     done = terminated or truncated
        #     episode_reward += reward
        #     episode_length += 1
            
        #     # Episode 업데이트
        #     episode_obj.add_env_step(
        #         obs,
        #         action,
        #         reward,
        #         terminated=terminated,
        #         truncated=truncated,
        #     )
            

        print(episode_obj.get_return())
        eval_results.append({
            'episode': episode + 1,
            'reward': episode_reward,
            'length': episode_length,
        })
        
        if episode_obj.is_done:
            obs, info = env.reset()
            episode_obj = SingleAgentEpisode(
                observations=[obs],
                observation_space=env.observation_space,
                action_space=env.action_space,
            )
    
    env.close()
else:
    # 파이프라인이 있는 경우 예제 방식 사용
    env = gym.make(ENV_NAME, render_mode=None)
    
    obs, _ = env.reset()
    episode_obj = SingleAgentEpisode(
        observations=[obs],
        observation_space=env.observation_space,
        action_space=env.action_space,
    )
    
    num_episodes = 0
    while num_episodes < NUM_EVAL_EPISODES:
        shared_data = {}
        input_dict = env_to_module(
            episodes=[episode_obj],
            rl_module=rl_module,
            explore=False,
            shared_data=shared_data,
        )
        
        rl_module_out = rl_module.forward_inference(input_dict)
        
        to_env = module_to_env(
            batch=rl_module_out,
            episodes=[episode_obj],
            rl_module=rl_module,
            explore=False,
            shared_data=shared_data,
        )
        
        action = to_env.pop(Columns.ACTIONS)[0]
        obs, reward, terminated, truncated, _ = env.step(action)
        
        episode_obj.add_env_step(
            obs,
            action,
            reward,
            terminated=terminated,
            truncated=truncated,
            extra_model_outputs={k: v[0] for k, v in to_env.items()},
        )
        
        if episode_obj.is_done:
            eval_results.append({
                'episode': num_episodes + 1,
                'reward': episode_obj.get_return(),
                'length': len(episode_obj),
            })
            obs, info = env.reset()
            episode_obj = SingleAgentEpisode(
                observations=[obs],
                observation_space=env.observation_space,
                action_space=env.action_space,
            )
            num_episodes += 1
    
    env.close()


eval_df = pd.DataFrame(eval_results)
print("\n평가 결과:")
print(eval_df)
print(f"\n평균 보상: {eval_df['reward'].mean():.2f} ± {eval_df['reward'].std():.2f}")
print(f"평균 에피소드 길이: {eval_df['length'].mean():.2f} ± {eval_df['length'].std():.2f}")

# # 평가 결과 저장
# eval_df.to_csv(RESULTS_DIR / "evaluation_results.csv", index=False)

-3276.4039427115335
-9850.072188104015
-15673.59652114871
-300.93567270228556
-1116.6027071451263
-1227.2568665667268
-3745.1554650030926
-5398.887282483202
-3338.757733269208
-2879.1584435942264

평가 결과:
   episode        reward  length
0        1  -3276.403943     249
1        2  -9850.072188    1073
2        3 -15673.596521    1347
3        4   -300.935673      74
4        5  -1116.602707     187
5        6  -1227.256867     402
6        7  -3745.155465     282
7        8  -5398.887282     470
8        9  -3338.757733     291
9       10  -2879.158444     291

평균 보상: -4680.68 ± 4711.53
평균 에피소드 길이: 466.60 ± 411.29


## 7. 모델 저장 및 로드

In [None]:
algo.save(algo.logdir)

In [None]:
# 모델 저장
checkpoint_dir = algo.save()
print(f"모델 저장 완료: {checkpoint_dir}")

# 모델 로드 예시 (주석 처리)
# loaded_algo = Algorithm.from_checkpoint(checkpoint_dir)

## 8. 정리

In [None]:
# 알고리즘 정리
algo.stop()

# Ray 종료 (필요한 경우)
# ray.shutdown()

print("정리 완료")