In [None]:
class PathLogger(gym.Wrapper):
    def __init__(self, env, bonus_pos=None):
        super().__init__(env)
        self.path=[]; self.ret=0.0; self.episode=0
        self.bonus_pos=[tuple(bp) for bp in (bonus_pos or [])]

    def reset(self, *, seed=None, options=None):
        if self.path:  # 이전 에피소드 요약
            self.episode += 1
        obs, info = self.env.reset(seed=seed, options=options)
        y,x = self.env.pos
        self.path=[(int(y),int(x))]; self.ret=0.0
        return obs, info

    def step(self, action):
        obs, r, terminated, truncated, info = self.env.step(action)
        y,x = self.env.pos
        self.path.append((int(y),int(x))); self.ret += float(r)

        reached_goal = np.array_equal(self.env.pos, self.env.goal)
        passed_bonus = any(bp in self.path for bp in self.bonus_pos)

        if terminated:
            info = dict(info)
            info["episode"] = self.episode
            info["success"] = bool(reached_goal)
            info["bonus"] = bool(passed_bonus)
            info["steps"]   = len(self.path) - 1
            info["return"]  = self.ret
            info["path"]    = [list(p) for p in self.path]  # tuple→list
        return obs, r, terminated, truncated, info

In [None]:
class GoalEpisodeLogger(BaseCallback):
    def __init__(self, ext_logger):
        super().__init__(verbose=0)
        self.ext_logger = ext_logger
        self.ep = 0  # 전역 에피소드 번호(단일 env 가정)

    def _on_step(self) -> bool:
        infos = self.locals.get("infos")
        dones = self.locals.get("dones")
        if not infos:
            return True

        # 단일 env 가정: 첫 번째만 사용
        info = infos[0]
        done = bool(dones[0]) if dones is not None else False

        if info and info.get("success"):
            self.ext_logger.info(f"[GOAL] episode={self.ep+1} steps={info['steps']} timesteps={self.num_timesteps} return={info['return']:.2f}")
            # self.ext_logger.info(f"[GOAL] path={info['path']}")
            converted_path = list(map(list, info["path"]))
            goal_logger.info(f"[GOAL] path for webinput: {converted_path}")

            self.logger.record("goal/episode", self.ep+1)
            self.logger.record("goal/steps", info["steps"])
            self.logger.record("goal/return", info["return"])
        
        if info and info.get("bonus"):
            self.ext_logger.info(f"[BONUS] episode={self.ep+1} steps={info['steps']} timesteps={self.num_timesteps} return={info['return']:.2f}")
            # self.ext_logger.info(f"[GOAL] path={info['path']}")
            converted_path = list(map(list, info["path"]))
            goal_logger.info(f"[BONUS] path for webinput: {converted_path}")

        if done:
            if info and (self.ep+1) % 10 == 0:
                self.ext_logger.info(f"[EPISODE] episode={self.ep+1} steps={info['steps']} timesteps={self.num_timesteps} return={info['return']:.2f}")
                converted_path = list(map(list, info["path"]))
                goal_logger.info(f"[EPISODE] path for webinput: {converted_path}")
            self.ep += 1

        return True