In [4]:
from tclab import TCLabModel
import tclab
import numpy as np
import time, csv
import matplotlib.pyplot as plt
from APMonitor.apm import apm_web        
from mpc_lib import mpc_init, mpc          
from pathlib import Path


class MPCDataCollector:

    def __init__(
        self,
        episodes: int = 100,
        total_time_sec: int = 1200,      # 20 분
        sample_interval: float = 5.0,    # 5 초 간격
        start_temp: float = 29.0,        # 냉각 완료 기준
        setpoint_range: tuple = (25.0, 65.0),
        data_dir: Path = Path('../data_back3/PID2MPC/MPC2')
    ):
        # 시간 파라미터
        self.total_time = total_time_sec
        self.dt         = sample_interval
        self.steps      = int(total_time_sec / sample_interval)  # 240 step

        # 실험 파라미터
        self.episodes   = episodes
        self.start_temp = start_temp
        self.sp_low, self.sp_high = setpoint_range

        # 데이터 디렉터리
        self.csv_dir = data_dir / 'csv'
        self.png_dir = data_dir / 'png'
        self.csv_dir.mkdir(parents=True, exist_ok=True)
        self.png_dir.mkdir(parents=True, exist_ok=True)

        # (선택) APMonitor 서버 정보
        self.apm_server = 'http://byu.apmonitor.com'
        self.apm_app    = 'my_MPC'

    # ---------- set‑point 프로파일 ----------
    # ---------- set‑point 프로파일 ----------
    def generate_random_tsp(self) -> np.ndarray:
        """
        Set-point 구간별 정보를 로그로 출력.
        각 구간 길이: 평균 480초, σ 100초, 최소 160초, 최대 800초
        """
        tsp = np.zeros(self.steps)
        i = 0
        seg_id = 1  # 구간 번호
        print(f"\n--- Set-point 프로파일 생성 (총 시간: {self.total_time}초, 총 step: {self.steps}) ---")
        while i < self.steps:
            dur_sec = int(np.clip(np.random.normal(480, 100), 160, 800))
            dur_steps = max(1, int(dur_sec / self.dt))
            end = min(i + dur_steps, self.steps)
    
            # set-point 값 설정
            temp = round(np.random.uniform(self.sp_low, self.sp_high), 2)
            tsp[i:end] = temp
    
            # 로그 출력
            start_time = int(i * self.dt)
            end_time = int((end - 1) * self.dt)
            print(f"구간 {seg_id}: step {i:>3} ~ {end-1:>3} (시간 {start_time:>4}s ~ {end_time:>4}s) → 목표 온도: {temp:.2f}°C")
    
            i = end
            seg_id += 1
        print("-----------------------------------------------------------\n")
        return tsp

    # ---------- 그림 저장 ----------
    def save_plot(self, t, T1, Tsp1, T2, Tsp2, Q1, Q2, ep):
        fig, axs = plt.subplots(2, 1, figsize=(10, 8))

        # 온도
        axs[0].plot(t, T1,  'b-', label='T1 (measured)')
        axs[0].plot(t, Tsp1,'k--',label='T1 (setpoint)')
        axs[0].plot(t, T2,  'r-', label='T2 (measured)')
        axs[0].plot(t, Tsp2,'k:', label='T2 (setpoint)')
        axs[0].set_ylabel('Temperature (°C)')
        axs[0].legend(); axs[0].grid()

        # 히터
        axs[1].plot(t, Q1, 'b-', label='Q1')
        axs[1].plot(t, Q2, 'r--', label='Q2')
        axs[1].set_ylabel('Heater Output (%)')
        axs[1].set_xlabel('Time (s)')
        axs[1].legend(); axs[1].grid()

        plt.tight_layout()
        plt.savefig(self.png_dir / f'mpc_episode_{ep}.png')
        plt.close()

    # ---------- 에피소드 실행 ----------
    def run_episode(self, arduino, ep):
        print(f"\n=== Episode {ep} start ===")

        # 안정 온도까지 냉각
        arduino.Q1(0); arduino.Q2(0)
        while arduino.T1 >= self.start_temp or arduino.T2 >= self.start_temp:
            print(f" Cooling... T1={arduino.T1:.1f}, T2={arduino.T2:.1f}")
            time.sleep(20)

        # 배열 초기화
        t   = np.zeros(self.steps)
        T1  = np.zeros(self.steps); T2  = np.zeros(self.steps)
        Q1  = np.zeros(self.steps); Q2  = np.zeros(self.steps)
        Tsp1 = self.generate_random_tsp(); Tsp2 = self.generate_random_tsp()
        iae  = 0.0  # Integral of Absolute Error

        # CSV 로그
        csv_path = self.csv_dir / f'mpc_episode_{ep}_data.csv'
        with open(csv_path, 'w', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(['Episode','Time','T1','T2','Q1','Q2','TSP1','TSP2','IAE'])

            mpc_init()
            start_time = time.time()

            for k in range(self.steps):            # 240 step
                loop_start = time.time()
                t[k] = k * self.dt                 # 0,5,10,...,1195

                # 센서 읽기
                T1[k] = round(arduino.T1, 2)
                T2[k] = round(arduino.T2, 2)

                # IAE 누적
                iae += abs(Tsp1[k] - T1[k]) + abs(Tsp2[k] - T2[k])

                # MPC 계산
                q1, q2 = mpc(T1[k], Tsp1[k], T2[k], Tsp2[k])
                Q1[k] = round(q1, 2); Q2[k] = round(q2, 2)
                arduino.Q1(Q1[k]); arduino.Q2(Q2[k])

                # 1분마다 로그
                if k % 12 == 0 or k == self.steps - 1:  # 12 step × 5 s = 60 s
                    print(f" t={t[k]:4.0f}s | "
                          f"SP1={Tsp1[k]:5.2f}, PV1={T1[k]:5.2f}, Q1={Q1[k]:5.2f} | "
                          f"SP2={Tsp2[k]:5.2f}, PV2={T2[k]:5.2f}, Q2={Q2[k]:5.2f} | "
                          f"IAE={iae:7.2f}")

                # CSV 기록
                writer.writerow([
                    ep, f"{t[k]:.0f}",
                    f"{T1[k]:.2f}", f"{T2[k]:.2f}",
                    f"{Q1[k]:.2f}", f"{Q2[k]:.2f}",
                    f"{Tsp1[k]:.2f}", f"{Tsp2[k]:.2f}",
                    f"{iae:.2f}"
                ])

                # 5 초 간격 유지
                elapsed = time.time() - loop_start
                time.sleep(max(0.0, self.dt - elapsed))

        # 그래프 저장
        self.save_plot(t, T1, Tsp1, T2, Tsp2, Q1, Q2, ep)
        print(f"=== Episode {ep} done, data saved to {csv_path} ===")

    # ---------- 전체 실행 ----------
    def run(self):
        with tclab.TCLab() as arduino:
            print(arduino.version)
            arduino.LED(100)
            for ep in range(1, self.episodes + 1):
                self.run_episode(arduino, ep)
        print("All episodes finished.")



if __name__ == "__main__":
    collector = MPCDataCollector(episodes=1, total_time_sec=30, sample_interval=1.0)
    #     ↳ 테스트용으로 에피소드 1개, 30초짜리

    with TCLabModel(synced=True) as simlab:
        collector.run_episode(simlab, ep=1)

TCLab version 1.0.0
Simulated TCLab

=== Episode 1 start ===

--- Set-point 프로파일 생성 (총 시간: 30초, 총 step: 30) ---
구간 1: step   0 ~  29 (시간    0s ~   29s) → 목표 온도: 50.01°C
-----------------------------------------------------------


--- Set-point 프로파일 생성 (총 시간: 30초, 총 step: 30) ---
구간 1: step   0 ~  29 (시간    0s ~   29s) → 목표 온도: 36.90°C
-----------------------------------------------------------

apm 220.76.61.147_my_mpc <br><pre> ----------------------------------------------------------------
 APMonitor, Version 1.0.3
 APMonitor Optimization Suite
 ----------------------------------------------------------------
 
 
 --------- APM Model Size ------------
 Each time step contains
   Objects      :            0
   Constants    :            7
   Variables    :           10
   Intermediates:            5
   Connections  :            0
   Equations    :            9
   Residuals    :            4
 
 Number of state variables:            800
 Number of total equations: -          760
 Numbe

KeyboardInterrupt: 

In [1]:
# %%
# Policy evaluation notebook cell
#
# ① 경로 설정 ───────────────────────────────────────────────────────────
MODEL_PATH = r"C:\Users\Developer\TCLab\IQL\logs\mpc-iql\04-23-25_14.45.10_ukeq\final.pt"
EVAL_LOG_ROOT = r"C:\Users\Developer\TCLab\IQL\eval_sim_logs"

# ② 필요한 라이브러리 import ───────────────────────────────────────────
import os, time, csv, math, torch, numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

from tclab import setup                    # BYU TCLab 시뮬레이터
# util.py 에 이미 존재한다고 가정
from util import torchify, set_seed
from policy import DeterministicPolicy

# ③ sim_evaluate_policy 함수 정의 ─────────────────────────────────────
def random_tsp_profile(max_steps, low=25.0, high=65.0, period=5):
    tsp = np.zeros(max_steps)
    for k in range(0, max_steps, period):
        tsp[k: k + period] = np.random.uniform(low, high)
    return tsp

def sim_evaluate_policy(
    policy,
    max_steps=1200,
    log_root="./eval_logs",
    seed=1,
    ambient=29.0,
    deterministic=True,
):
    set_seed(seed)
    run_dir = Path(log_root) / f"sim_seed{seed}"
    run_dir.mkdir(parents=True, exist_ok=True)

    lab = setup(connected=False)
    env = lab(ambient=ambient, synced=False)
    env.Q1(0); env.Q2(0)

    Tsp1 = random_tsp_profile(max_steps)
    Tsp2 = random_tsp_profile(max_steps)

    t  = np.arange(max_steps)
    T1 = np.zeros(max_steps); T2 = np.zeros(max_steps)
    Q1 = np.zeros(max_steps); Q2 = np.zeros(max_steps)

    total_return = 0.0
    e1 = e2 = over = under = 0.0

    policy.eval()

    for k in range(max_steps):
        env.update(t=k)

        T1[k] = env.T1
        T2[k] = env.T2
        obs = np.array([T1[k], T2[k], Tsp1[k], Tsp2[k]], dtype=np.float32)

        with torch.no_grad():
            act = policy.act(torchify(obs), deterministic=deterministic).cpu().numpy()

        Q1[k] = float(np.clip(act[0], 0, 100))
        Q2[k] = float(np.clip(act[1], 0, 100))
        env.Q1(Q1[k]); env.Q2(Q2[k])

        reward = -math.hypot(T1[k] - Tsp1[k], T2[k] - Tsp2[k])
        total_return += reward

        err1 = Tsp1[k] - T1[k]
        err2 = Tsp2[k] - T2[k]
        e1 += abs(err1);  e2 += abs(err2)
        over  += max(0, -err1) + max(0, -err2)
        under += max(0,  err1) + max(0,  err2)

    env.Q1(0); env.Q2(0)

    # CSV 저장
    csv_path = run_dir / "rollout.csv"
    with open(csv_path, "w", newline="") as f:
        w = csv.writer(f)
        w.writerow(["time","T1","T2","Q1","Q2","TSP1","TSP2"])
        for k in range(max_steps):
            w.writerow([t[k], T1[k], T2[k], Q1[k], Q2[k], Tsp1[k], Tsp2[k]])

    # 그래프 저장
    fig, ax = plt.subplots(2,1,figsize=(10,8))
    ax[0].plot(t, T1, label="T1"); ax[0].plot(t, Tsp1, "--", label="TSP1")
    ax[0].plot(t, T2, label="T2"); ax[0].plot(t, Tsp2, ":", label="TSP2")
    ax[0].set_ylabel("Temp (°C)"); ax[0].legend(); ax[0].grid()

    ax[1].plot(t, Q1, label="Q1"); ax[1].plot(t, Q2, label="Q2")
    ax[1].set_ylabel("Heater (%)"); ax[1].set_xlabel("Time (s)")
    ax[1].legend(); ax[1].grid()
    plt.tight_layout()
    plt.savefig(run_dir / "rollout.png")
    plt.show()

    metrics = dict(return_sum=total_return, E1=e1, E2=e2, Over=over, Under=under)
    return metrics

# ④ 정책 네트워크 구성 & 가중치 로드 ──────────────────────────────────
policy_net = DeterministicPolicy(obs_dim=4, act_dim=2)
ckpt = torch.load(MODEL_PATH, map_location="cpu")

# 'policy.' 접두사만 추출
subdict = {k.replace("policy.", ""): v for k, v in ckpt.items() if k.startswith("policy.")}
policy_net.load_state_dict(subdict)

# ⑤ 시뮬레이터 평가 실행 ───────────────────────────────────────────
metrics = sim_evaluate_policy(
    policy=policy_net,
    max_steps=1200,
    log_root=EVAL_LOG_ROOT,
    seed=1,
    ambient=29.0,
    deterministic=True
)

print("Evaluation metrics:", metrics)



ImportError: attempted relative import with no known parent package