# 학습된 에이전트 시각화 진행을 위한 노트북

### 필요 패키지 Import

In [1]:
import gymnasium as gym
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

# 조이스틱 환경 삽입
from horcrux_terrain_v1.envs import PlaneJoyWorld
from horcrux_terrain_v1.envs import PlaneWorld

# Ray 패키지 삽입
import ray
from ray.rllib.algorithms.algorithm import Algorithm
from ray.rllib.algorithms.sac import SACConfig

from ray.tune.registry import register_env

  "cipher": algorithms.TripleDES,
  "class": algorithms.Blowfish,
  "class": algorithms.TripleDES,


### Ray 실행

In [2]:
import socket
import psutil

conn_ip = ""
interfaces = psutil.net_if_addrs()
for interface_name, addresses in interfaces.items():
    if "openvpn" in interface_name.lower() and "tap" in interface_name.lower():
        snicaddrs = interfaces[str(interface_name)]
        for addrfamily in snicaddrs:
            if addrfamily.family == socket.AF_INET:
                conn_ip = addrfamily.address

# 해당 init을 통해서 VPN을 통한 외부 접속 가능함.
ray.init(dashboard_host=conn_ip, dashboard_port=8265)

2025-02-27 15:13:45,655	INFO worker.py:1777 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


0,1
Python version:,3.12.5
Ray version:,2.36.1
Dashboard:,http://127.0.0.1:8265


### Gym 환경 등록하기

In [12]:
env_config = {
    "forward_reward_weight": 6.5,
    "side_cost_weight": 2.0,
    "unhealthy_max_steps": 100,
    "healthy_reward": 0.5,
    "healthy_roll_range": (-35,35),
    "terminating_roll_range": (-85,85),
    "rotation_norm_cost_weight": 0.01,
    "rotation_orientation_cost_weight": 1.2,
    "termination_reward": 0,
    "gait_params": (30, 30, 60, 60, 0),
    "use_friction_chg": True,
    # "joy_input_random": True,
}

# JoyWorld
register_env("joy-v1", lambda config: PlaneJoyWorld( forward_reward_weight=env_config["forward_reward_weight"], 
                                                     side_cost_weight=env_config["side_cost_weight"], 
                                                     unhealthy_max_steps=env_config["unhealthy_max_steps"],
                                                     healthy_reward=env_config["healthy_reward"], 
                                                     healthy_roll_range=env_config["healthy_roll_range"],
                                                     terminating_roll_range=env_config["terminating_roll_range"],
                                                     rotation_norm_cost_weight=env_config["rotation_norm_cost_weight"],
                                                     rotation_orientation_cost_weight=env_config["rotation_orientation_cost_weight"],
                                                     termination_reward=env_config["termination_reward"],
                                                     gait_params=env_config["gait_params"],
                                                     use_friction_chg=env_config["use_friction_chg"],
                                                     joy_input_random=env_config["joy_input_random"],
                                                   )
            )

# Plane
register_env("plane-v1", lambda config: PlaneWorld(forward_reward_weight=env_config["forward_reward_weight"], 
                                                 side_cost_weight=env_config["side_cost_weight"], 
                                                 unhealthy_max_steps=env_config["unhealthy_max_steps"], 
                                                 healthy_reward=env_config["healthy_reward"],
                                                 healthy_roll_range=env_config["healthy_roll_range"],
                                                 terminating_roll_range=env_config["terminating_roll_range"],
                                                 rotation_norm_cost_weight=env_config["rotation_norm_cost_weight"],
                                                 rotation_orientation_cost_weight=env_config["rotation_orientation_cost_weight"],
                                                 termination_reward=env_config["termination_reward"],
                                                 use_friction_chg=env_config["use_friction_chg"],
                                                 gait_params=env_config["gait_params"]))

### 학습 알고리즘 불러오기

In [6]:
algo = Algorithm.from_checkpoint('../learning/Paper_agents/good/Linear/SAC_layer_512_5_32_Linear_restart_final')

`UnifiedLogger` will be removed in Ray 2.7.
  return UnifiedLogger(config, logdir, loggers=None)
The `JsonLogger interface is deprecated in favor of the `ray.tune.json.JsonLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
The `CSVLogger interface is deprecated in favor of the `ray.tune.csv.CSVLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
The `TBXLogger interface is deprecated in favor of the `ray.tune.tensorboardx.TBXLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
[36m(pid=45776)[0m   "cipher": algorithms.TripleDES,
[36m(pid=45776)[0m   "class": algorithms.Blowfish,
[36m(pid=45776)[0m   "class": algorithms.TripleDES,
2025-02-27 15:14:58,648	INFO trainable.py:161 -- Trainable.setup took 13.699 seconds. If your trainable is slow to initialize, consider setti

### 평가용 Env 생성

In [13]:
import pathlib
import torch
import numpy as np
import gymnasium as gym
from horcrux_terrain_v1.envs import SandWorld
from horcrux_terrain_v1.envs import PlaneWorld
from ray.rllib.algorithms.algorithm import Algorithm
import time

eval_config = env_config
# eval_config["use_friction_chg"] = False
# eval_config["joy_input_random"] = False
# eval_config["joy_input"] = (1, 0, 0)

In [27]:
env = gym.make("horcrux_terrain_v1/plane-v1", 
               terminate_when_unhealthy = False, 
               render_mode = "human", 
            #    render_camera_name = 'ceiling', 
               use_gait = True,               
               **eval_config,
               ) 

info_stack = []

for j in range(2):
   episode_return = 0
   yaw_total = 0
   terminated = truncated = False

   obs, info = env.reset()

   for i in range(1000):

      action = algo.compute_single_action(observation= obs)
      
      obs, reward, terminated, truncated, info = env.step(action)
      info_stack.append(info)
      
      if terminated:
         print("terminated")

      episode_return += reward
      yaw_total += info['step_ypr'][0] / 0.1

   print(f"Reached episode return of {episode_return}.")

env.close()

c:\Users\doore\anaconda3\envs\gdtor\Lib\site-packages\glfw\__init__.py:914: GLFWError: (65537) b'The GLFW library is not initialized'
Exception ignored in: <function WindowViewer.__del__ at 0x00000202B4D9C7C0>
Traceback (most recent call last):
  File "c:\Users\doore\anaconda3\envs\gdtor\Lib\site-packages\gymnasium\envs\mujoco\mujoco_rendering.py", line 335, in __del__
    self.free()
  File "c:\Users\doore\anaconda3\envs\gdtor\Lib\site-packages\gymnasium\envs\mujoco\mujoco_rendering.py", line 330, in free
    glfw.destroy_window(self.window)
  File "c:\Users\doore\anaconda3\envs\gdtor\Lib\site-packages\glfw\__init__.py", line 1279, in destroy_window
    window_addr = ctypes.cast(ctypes.pointer(window),
                              ^^^^^^^^^^^^^^^^^^^^^^
TypeError: _type_ must have storage info


Reached episode return of 4027.699481765632.
Reached episode return of 4154.2043499703095.


### 데이터 플롯

In [25]:
x_vel = [entry["x_velocity"] for entry in info_stack]
y_vel = [entry["y_velocity"] for entry in info_stack]
yaw_vel = [entry["step_ypr"][0] for entry in info_stack]
 
import scipy.io 

scipy.io.savemat('sim_data.mat', {'x_vel': x_vel, 'y_vel': y_vel, 'yaw_vel': yaw_vel})

In [None]:

indices = np.arange(len(x_vel))

plt.figure()
plt.plot(indices, x_vel, label='Vector a', marker='o')
plt.plot(indices, y_vel, label='Vector b', marker='o')
plt.plot(indices, yaw_vel, label='Vector c', marker='o')

# 라벨과 제목 추가
plt.xlabel('Step')
plt.ylabel('Value')
plt.title('Three Column Vectors')
plt.legend(["x", "y", "yaw"])
plt.grid(True)

# 플롯 보여주기
plt.show()