In [1]:
import gymnasium as gym

from stable_baselines3 import DQN

In [2]:
from stable_baselines3.common.callbacks import CheckpointCallback

# Save a checkpoint every 500 steps
freq = 1000

checkpoint_callback = CheckpointCallback(
  save_freq=freq,
  save_path="./logs/",
  name_prefix="lunar_",
  save_replay_buffer=False,
  save_vecnormalize=True,
)

In [3]:

env = gym.make("LunarLander-v2", render_mode="rgb_array")

model = DQN("MlpPolicy",
            env,
            verbose=1,
            exploration_final_eps=0.1,
            target_update_interval=250
        )

model.learn(total_timesteps=10000, callback=checkpoint_callback)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 85       |
|    ep_rew_mean      | -217     |
|    exploration_rate | 0.694    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 5394     |
|    time_elapsed     | 0        |
|    total_timesteps  | 340      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 85.8     |
|    ep_rew_mean      | -187     |
|    exploration_rate | 0.383    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 6681     |
|    time_elapsed     | 0        |
|    total_timesteps  | 686      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 88.8     |
|    ep_rew_mean      | -174   

<stable_baselines3.dqn.dqn.DQN at 0x152a4a0d0>

In [4]:
import os

models = [DQN.load("./logs/"+loc, env = env) for loc in os.listdir("./logs")]

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [5]:
rewards = []
i=0

for model in models:

    print("Model after " + str((i+1)*freq) + " timesteps")
    vec_env = model.get_env()
    obs = vec_env.reset()
    done = False
    total_reward = 0

    while not done:
        action, _state = model.predict(obs, deterministic=True)
        obs, reward, done, info = vec_env.step(action)
        vec_env.render("human")
        total_reward += reward
    
    rewards.append(total_reward)
    print("    Reward: " + str(total_reward))
    i += 1


Model after 1000 timesteps
    Reward: [-158.40396]
Model after 2000 timesteps
    Reward: [-192.57431]
Model after 3000 timesteps
    Reward: [-272.60114]
Model after 4000 timesteps
    Reward: [-284.35788]
Model after 5000 timesteps
    Reward: [-254.82362]
Model after 6000 timesteps
    Reward: [-119.92459]
Model after 7000 timesteps
    Reward: [-335.34497]
Model after 8000 timesteps
    Reward: [-342.74115]
Model after 9000 timesteps
    Reward: [-135.93852]
Model after 10000 timesteps
    Reward: [-194.19492]
