In [30]:
import gymnasium as gym
from stable_baselines3 import DQN
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.evaluation import evaluate_policy

import os

In [31]:
env = gym.make("CartPole-v1", render_mode="rgb_array")

In [32]:
save_freq = 2000

checkpoint_callback = CheckpointCallback(
  save_freq=save_freq,
  save_path="./logs/",
  name_prefix="rl_model",
  save_replay_buffer=False,
  save_vecnormalize=True,
)

In [24]:
for filename in os.listdir("./logs"):
    os.remove("./logs/" + filename)

model = DQN("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=save_freq * 10,
            callback=checkpoint_callback,
            progress_bar=True)

model.save("./logs/dqn_trained")

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.5     |
|    ep_rew_mean      | 28.5     |
|    exploration_rate | 0.892    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 4932     |
|    time_elapsed     | 0        |
|    total_timesteps  | 114      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27.9     |
|    ep_rew_mean      | 27.9     |
|    exploration_rate | 0.788    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 4842     |
|    time_elapsed     | 0        |
|    total_timesteps  | 223      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 26.9     |
|    ep_rew_mean      | 26.9     |
|    exploration_rate | 0.693    |
| time/               |          |
|    episodes         | 12       |
|    fps              | 5128     |
|    time_elapsed     | 0        |
|    total_timesteps  | 323      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 26.6     |
|    ep_rew_mean      | 26.6     |
|    exploration_rate | 0.596    |
| time/               |          |
|    episodes       

In [27]:
import os

filenames = ["./logs/rl_model_" + str(i*save_freq) + "_steps.zip"
             for i in range(1, 11)]

In [28]:
for filename in filenames:
    print(filename)
    m = DQN.load(filename, env=env)
    env = m.get_env()
    obs = env.reset()
    done = False
    total_reward = 0
    while not done:
        action, _state = m.predict(obs, deterministic=True)
        obs, reward, done, info = env.step(action)
        total_reward += reward
        env.render("human")
    print("    Total reward: " + str(total_reward))

./logs/rl_model_1000_steps.zip
    Total reward: [10.]
./logs/rl_model_2000_steps.zip
    Total reward: [10.]
./logs/rl_model_3000_steps.zip
    Total reward: [9.]
./logs/rl_model_4000_steps.zip
    Total reward: [9.]
./logs/rl_model_5000_steps.zip
    Total reward: [9.]
./logs/rl_model_6000_steps.zip
    Total reward: [10.]
./logs/rl_model_7000_steps.zip
    Total reward: [10.]
./logs/rl_model_8000_steps.zip
    Total reward: [9.]
./logs/rl_model_9000_steps.zip
    Total reward: [8.]
./logs/rl_model_10000_steps.zip
    Total reward: [8.]


In [29]:
model = DQN.load("./logs/dqn_trained.zip", env=env)
env = model.get_env()
obs = env.reset()
done = False
total_reward = 0
while not done:
    action, _state = model.predict(obs, deterministic=True)
    obs, reward, done, info = env.step(action)
    total_reward += reward
    env.render("human")
print("    Total reward: " + str(total_reward))

    Total reward: [9.]
