In [1]:
# import sys
# sys.path.append(r"C:\Users\user\anaconda3\lib\site-packages\matlabengine-24.2-py3.12.egg")

# import matlab.engine
# eng = matlab.engine.start_matlab()
# print("MATLAB 연결 성공!")


In [2]:
# !pip install stable-baselines3[extra]

In [3]:
import matlab.engine
import numpy as np
from gymnasium import Env
from gymnasium.spaces import Box
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
import math

# 1. 강화학습 환경 정의
class PIDEnv(Env):
    def __init__(self):
        super().__init__()
        self.action_space = Box(low=np.array([0.1, 0.01, 0.01], dtype=np.float32),
                                high=np.array([5.0, 2.0, 2.0], dtype=np.float32),
                                dtype=np.float32)
        self.observation_space = Box(low=-np.inf, high=np.inf, shape=(1,), dtype=np.float32)
        self.state = np.array([0.0], dtype=np.float32)

        print("🔄 MATLAB 엔진 시작 중...")
        self.eng = matlab.engine.start_matlab()
        print("✅ MATLAB 엔진 시작 완료")

        # run_uav_sim.m과 state_derivative.m이 동일한 경로에 있어야 함
        self.eng.cd(r'C:\Users\user\Desktop\rkdghkgkrtmq\PIDQN')  # 필요에 따라 경로 수정

    def step(self, action):
        Kp, Ki, Kd = float(action[0]), float(action[1]), float(action[2])
        try:
            reward = float(self.eng.run_uav_sim(Kp, Ki, Kd))
            if math.isnan(reward) or math.isinf(reward):
                print(f"[❌ NaN 보상] Kp={Kp:.3f}, Ki={Ki:.3f}, Kd={Kd:.3f}")
                reward = -1e6
        except matlab.engine.MatlabExecutionError as e:
            print(f"[MATLAB 오류 발생]: {e}")
            reward = -1e6  # 심각한 오류 발생 시 큰 페널티

        terminated = True
        truncated = False
        return self.state, reward, terminated, truncated, {}

    def reset(self, *, seed=None, options=None):
        super().reset(seed=seed)
        self.state = np.array([0.0], dtype=np.float32)
        return self.state, {}

    def render(self, mode='human'):
        pass

    def close(self):
        self.eng.quit()

# 2. 환경 생성 및 확인
env = PIDEnv()
check_env(env, warn=True)

# 3. PPO 에이전트 생성 및 학습
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=3000)

# 4. 학습된 최적 PID 게인 확인
obs, _ = env.reset()
Kp, Ki, Kd = model.predict(obs, deterministic=True)[0]
print(f"🔧 최적 튜닝 결과: Kp = {Kp:.3f}, Ki = {Ki:.3f}, Kd = {Kd:.3f}")





🔄 MATLAB 엔진 시작 중...
✅ MATLAB 엔진 시작 완료




Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.0232  |
| time/              |          |
|    fps             | 164      |
|    iterations      | 1        |
|    time_elapsed    | 12       |
|    total_timesteps | 2048     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1          |
|    ep_rew_mean          | -0.0158    |
| time/                   |            |
|    fps                  | 160        |
|    iterations           | 2          |
|    time_elapsed         | 25         |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.04043656 |
|    clip_fraction        | 0.37       |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.28