In [1]:
# import sys
# sys.path.append(r"C:\Users\user\anaconda3\lib\site-packages\matlabengine-24.2-py3.12.egg")

# import matlab.engine
# eng = matlab.engine.start_matlab()
# print("MATLAB 연결 성공!")


In [2]:
!pip install "stable-baselines3[extra]" gymnasium



DEPRECATION: Loading egg at c:\users\user\anaconda3\lib\site-packages\matlabengine-24.2-py3.12.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330


In [3]:
import numpy as np
from gymnasium import Env
from gymnasium.spaces import Box
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
import math

# --- 시뮬레이터 함수 (run_uav_sim.m 포팅 버전) ---
def run_uav_sim(Kp, Ki, Kd, dt=0.01, tf=10.0):
    n = int(tf / dt)
    m, g = 13.5, 9.81
    Ix, Iy, Iz, Ixz = 0.8244, 1.135, 1.759, 0.1204
    S, c, rho, S_p, C_p, k_m = 0.55, 0.18994, 1.2682, 0.2027, 1.0, 80
    deg2rad = np.pi / 180
    the_d = 3 * deg2rad
    limit_e = 20 * deg2rad
    noise_std = 0.1 * deg2rad

    state = np.zeros(12)
    state[2] = -100
    state[4] = 3 * deg2rad
    state[6] = 20
    state[8] = 0.1
    state[10] = 0.01

    err_int = 0
    err_prev = 0
    EA_log = []

    for _ in range(n):
        theta = state[4]
        u, w = state[6], state[8]
        q = state[10]
        err = -the_d + theta
        err_int += err * dt
        err_dev = (err - err_prev) / dt
        err_prev = err

        delta_e = np.clip(Kp * err + Ki * err_int + Kd * err_dev, -limit_e, limit_e)
        V = np.linalg.norm([u, w])
        if V < 0.1 or np.isnan(V) or np.isnan(err_int):
            return -1e6

        alpha = np.arctan2(w, u)
        q_bar = 0.5 * rho * V ** 2
        CL = 0.28 + 3.45 * alpha + 0.36 * delta_e
        CD = 0.03 + 0.3 * abs(alpha)
        Cm = -0.02338 - 0.38 * alpha - 3.6 * (c / (2 * V)) * q - 0.5 * delta_e

        Fx = -q_bar * S * CD
        Fz = -q_bar * S * CL
        F_g = np.array([-m * g * np.sin(theta), m * g * np.cos(theta), 0])
        F_a = np.array([Fx, 0, Fz])
        F_p = np.array([0.5 * rho * S_p * C_p * ((k_m * 0.4)**2 - V**2), 0, 0])

        Cw2b = np.array([[np.cos(alpha), 0, np.sin(alpha)],
                         [0, 1, 0],
                         [-np.sin(alpha), 0, np.cos(alpha)]])
        F_tot = Cw2b @ F_a + F_p + F_g
        M = q_bar * S * c * Cm
        M_a = np.array([0, M, 0])

        state = rk4_step(state, F_tot, M_a, dt, m, Ix, Iy, Iz, Ixz)
        state[10] += noise_std * np.random.randn()
        EA_log.append(state[4])

    e = the_d - np.array(EA_log)
    ISE = np.sum(e ** 2) * dt
    return -ISE


def rk4_step(state, F, M, dt, m, Ix, Iy, Iz, Ixz):
    def dynamics(s):
        return state_derivative(s, F, M, m, Ix, Iy, Iz, Ixz)
    k1 = dynamics(state)
    k2 = dynamics(state + 0.5 * dt * k1)
    k3 = dynamics(state + 0.5 * dt * k2)
    k4 = dynamics(state + dt * k3)
    return state + (dt / 6) * (k1 + 2 * k2 + 2 * k3 + k4)


def state_derivative(state, F, M, m, Ix, Iy, Iz, Ixz):
    phi, the, psi = state[3], state[4], state[5]
    u, v, w = state[6:9]
    p, q, r = state[9:12]

    s_phi, c_phi = np.sin(phi), np.cos(phi)
    s_the, c_the = np.sin(the), np.cos(the)
    s_psi, c_psi = np.sin(psi), np.cos(psi)

    udot = - (q * w - r * v) + F[0] / m
    vdot = - (r * u - p * w) + F[1] / m
    wdot = - (p * v - q * u) + F[2] / m

    G5 = (Iz - Ix) / Iy
    G6 = Ixz / Iy
    pdot = 0
    qdot = G5 * p * r - G6 * (p**2 - r**2) + M[1] / Iy
    rdot = 0

    Cb2n = np.array([
        [c_psi * c_the, c_psi * s_the * s_phi - s_psi * c_phi, c_psi * s_the * c_phi + s_psi * s_phi],
        [s_psi * c_the, s_psi * s_the * s_phi + c_psi * c_phi, s_psi * s_the * c_phi - c_psi * s_phi],
        [-s_the, c_the * s_phi, c_the * c_phi]
    ])
    Pos_dot = Cb2n @ np.array([u, v, w])
    phidot = 0
    thedot = c_phi * q - s_phi * r
    psidot = 0

    return np.concatenate([Pos_dot, [phidot, thedot, psidot, udot, vdot, wdot, pdot, qdot, rdot]])

# --- 강화학습 환경 정의 ---
class PIDEnv(Env):
    def __init__(self):
        super().__init__()
        self.action_space = Box(low=np.array([0.1, 0.01, 0.01], dtype=np.float32),
                                high=np.array([5.0, 2.0, 2.0], dtype=np.float32),
                                dtype=np.float32)
        self.observation_space = Box(low=-np.inf, high=np.inf, shape=(1,), dtype=np.float32)
        self.state = np.array([0.0], dtype=np.float32)

    def step(self, action):
        Kp, Ki, Kd = float(action[0]), float(action[1]), float(action[2])
        try:
            reward = run_uav_sim(Kp, Ki, Kd)
            if math.isnan(reward) or math.isinf(reward):
                print(f"[❌ NaN 보상] Kp={Kp:.3f}, Ki={Ki:.3f}, Kd={Kd:.3f}")
                reward = -1e6
        except Exception as e:
            print(f"[Python 시뮬 오류 발생]: {e}")
            reward = -1e6
        terminated = True
        truncated = False
        return self.state, reward, terminated, truncated, {}

    def reset(self, *, seed=None, options=None):
        super().reset(seed=seed)
        self.state = np.array([0.0], dtype=np.float32)
        return self.state, {}

    def render(self, mode='human'):
        pass

# --- 학습 및 결과 출력 ---
env = PIDEnv()
check_env(env, warn=True)

model = PPO("MlpPolicy", env, verbose=1, device="cpu")  # CPU 권장
model.learn(total_timesteps=30000)

obs, _ = env.reset()
Kp, Ki, Kd = model.predict(obs, deterministic=True)[0]
print(f"🔧 최적 튜닝 결과: Kp = {Kp:.3f}, Ki = {Ki:.3f}, Kd = {Kd:.3f}")





Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.0222  |
| time/              |          |
|    fps             | 19       |
|    iterations      | 1        |
|    time_elapsed    | 105      |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1           |
|    ep_rew_mean          | -0.0131     |
| time/                   |             |
|    fps                  | 19          |
|    iterations           | 2           |
|    time_elapsed         | 213         |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.038057994 |
|    clip_fraction        | 0.364       |
|    clip_range           | 0.2         |
|    entropy_loss   