In [1]:
import os

os.environ['DISPLAY'] = ':1'

In [2]:
from sheeprl.envs.unity_env import UnityWrapper

In [3]:
import os

from typing import Any, Dict, Optional, SupportsFloat, Tuple, Union
import numpy as np
import gymnasium as gym
from gymnasium import spaces

from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.envs.unity_gym_env import UnityToGymWrapper


class UnityWrapper(UnityToGymWrapper):
    def __init__(self, file_name, env_num_id,max_steps=20000) -> None:
        print('initailizing!')
        os.environ['DISPLAY'] = ':1'
        print('env_num_id:', env_num_id)
        print('base_port:',5005+env_num_id)
        print('file name:',file_name)
        env = UnityEnvironment(file_name, worker_id=env_num_id)
        self.step_count = 0
        self.max_steps = max_steps

        super().__init__(env)

    # render_mode는 무조건 rgb이지만,
    @property
    def render_mode(self):
        return 'rgb_array'

    def _convert_obs(self, obs: Union[np.ndarray, Dict[str, np.ndarray]]) -> Dict[str, np.ndarray]:
        return {"rgb": obs}

    def step(self, action):
        obs, reward, terminated, info = super().step(action)
        self.step_count += 1
        print("self.step_count:",self.step_count)
        
        # 원래 환경에서 종료 신호가 오면 리셋
        if terminated:
            obs, _ = self.reset()
            terminated = False
        
        # 최대 스텝 수에 도달하면 truncated를 True로 설정
        truncated = self.step_count >= self.max_steps
        
        return obs, reward, terminated, truncated, info

    def reset(
        self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None
    ) -> Tuple[Any, Dict[str, Any]]:
        obs = super().reset()
        if obs.dtype == np.float32:
            obs = (obs * 255).astype(np.uint8)
        return obs, {}

    @property
    def observation_space(self):
        obs_space = super().observation_space
        low = np.transpose(obs_space.low, (1, 2, 0))
        high = np.transpose(obs_space.high, (1, 2, 0))
        return gym.spaces.Box(low=low, high=high, dtype=obs_space.dtype)

    def render(self):
        obs = super().render()
        if obs is not None:
            if obs.dtype == np.float32:
                obs = (obs * 255).astype(np.uint8)
            # (3, 84, 84)를 (84, 84, 3)으로 변환
            obs = np.transpose(obs, (1, 2, 0))
        return obs

In [4]:
import mlagents

from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.envs.unity_gym_env import UnityToGymWrapper
print('Now its loading.....')
env = UnityWrapper("/home/calm04/data/sheeprl/unity_env/240808_agent/240808_agent.x86_64",env_num_id=1,max_steps=20000)
print('load compelete!!')

Now its loading.....
initailizing!
env_num_id: 1
base_port: 5006
file name: /home/calm04/data/sheeprl/unity_env/240808_agent/240808_agent.x86_64
[UnityMemory] Configuration Parameters - Can be set up in boot.config
    "memorysetup-bucket-allocator-granularity=16"
    "memorysetup-bucket-allocator-bucket-count=8"
    "memorysetup-bucket-allocator-block-size=4194304"
    "memorysetup-bucket-allocator-block-count=1"
    "memorysetup-main-allocator-block-size=16777216"
    "memorysetup-thread-allocator-block-size=16777216"
    "memorysetup-gfx-main-allocator-block-size=16777216"
    "memorysetup-gfx-thread-allocator-block-size=16777216"
    "memorysetup-cache-allocator-block-size=4194304"
    "memorysetup-typetree-allocator-block-size=2097152"
    "memorysetup-profiler-bucket-allocator-granularity=16"
    "memorysetup-profiler-bucket-allocator-bucket-count=8"
    "memorysetup-profiler-bucket-allocator-block-size=4194304"
    "memorysetup-profiler-bucket-allocator-block-count=1"
    "memor

  unity_communicator_version = StrictVersion(unity_com_ver)
  api_version = StrictVersion(python_api_version)


load compelete!!


  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [5]:
env.action_space

Box(-1.0, 1.0, (11,), float32)

In [6]:
env.observation_space

Box(0.0, 1.0, (84, 84, 3), float32)

In [7]:
env.reward_range

(-inf, inf)

In [8]:
from gymnasium.wrappers import RecordVideo
env = RecordVideo(env, video_folder="./videos2", episode_trigger=lambda e: True)

  logger.warn(


In [10]:
while True:
    action = env.action_space.sample()  # 또는 여러분의 정책에 따른 액션
    obs, reward, terminated, truncated, info = env.step(action)
    
    if truncated:
        print("최대 스텝 수에 도달했습니다. 환경을 종료합니다.")
        break
    
    if terminated:
        print("에피소드가 자연스럽게 종료되었지만, 내부적으로 리셋되어 계속 진행됩니다.")

env.close()
print('환경이 닫혔습니다.')

In [9]:
for i in range(1000):
    action = env.action_space.sample()  # 랜덤 액션 선택
    obs, reward, terminated, truncated, info = env.step(action)
    
    # terminated나 truncated가 True여도 무시하고 계속 진행
    if terminated or truncated:
        print(f'Episode would have ended at step {i}, but continuing...')
        # 환경을 리셋하지 않음
    
    # 추가적인 로깅이나 디버깅 정보를 여기에 넣을 수 있습니다
    print(f"Step {i}: Reward = {reward}")

env.close()
print('env closed!!!')

Step 0: Reward = 0.0
Step 1: Reward = 0.0
Step 2: Reward = 0.0
Step 3: Reward = 0.0
Step 4: Reward = 0.0
Step 5: Reward = 0.0
Step 6: Reward = 0.0
Step 7: Reward = 0.0
Step 8: Reward = 0.0
Step 9: Reward = 0.0
Step 10: Reward = 0.0
Step 11: Reward = 0.0
Step 12: Reward = 0.0
Step 13: Reward = 0.0
Step 14: Reward = 0.0
Step 15: Reward = 0.0
Step 16: Reward = 0.0
Step 17: Reward = 0.0
Step 18: Reward = 0.0
Step 19: Reward = 0.0
Step 20: Reward = 0.0
Step 21: Reward = 0.0
Step 22: Reward = 0.0
Step 23: Reward = 0.0
Step 24: Reward = 0.0
Step 25: Reward = 0.0
Step 26: Reward = 0.0
Step 27: Reward = 0.0
Step 28: Reward = 0.0
Step 29: Reward = 0.0
Step 30: Reward = 0.0
Step 31: Reward = 0.0
Step 32: Reward = 0.0
Step 33: Reward = 0.0
Step 34: Reward = 0.0
Step 35: Reward = 0.0
Step 36: Reward = 0.0
Step 37: Reward = 0.0
Step 38: Reward = 0.0
Step 39: Reward = 0.0
Step 40: Reward = 0.0
Step 41: Reward = 0.0
Step 42: Reward = 0.0
Step 43: Reward = 0.0
Step 44: Reward = 0.0
Step 45: Reward = 0.

KeyboardInterrupt: 