# 가치 기반 강화 학습

## Stable Baselines

In [None]:
!pip install stable-baselines3[extra]

## DQN

In [1]:
#!hide
import imageio
import IPython
import tqdm

def render_episode(env, model, max_frame=1000):
    state, _ = env.reset()
    frames = []
    done = False
    for _ in tqdm.trange(max_frame): # 최대 프레임까지 진행
        action, _state = model.predict(state, deterministic=True)
        next_state, reward, terminated, truncated, _ = env.step(action)
        if terminated or truncated:
            break
        state = next_state
        frames.append(env.render())  # 프레임을 수집
    imageio.mimsave('cartpole.gif', frames, fps=30) # 초당 30프레임으로 GIF 만들기
    return IPython.display.Image('cartpole.gif') # GIF 보기

In [None]:
import gymnasium as gym
from stable_baselines3 import DQN

# 환경
env = gym.make("CartPole-v1", render_mode="rgb_array")

# 모델
model = DQN("MlpPolicy", env, verbose=1)

# 총 1000회에 걸쳐  학습
model.learn(total_timesteps=1000, log_interval=4, progress_bar=True)

In [None]:
# 애니메이션
render_episode(env, model)

In [None]:
# 저장
model.save("dqn_cartpole")

# 삭제
del model

# 불러오기
model = DQN.load("dqn_cartpole")

## 퀴즈

<iframe src="https://tally.so/embed/wMYXP8?alignLeft=1&hideTitle=1&transparentBackground=1&dynamicHeight=1" loading="lazy" width="100%" height="1800" frameborder="0" marginheight="0" marginwidth="0" title="[RL] 가치기반 강화학습"></iframe>