In [2]:
import gymnasium as gym
from stable_baselines3 import A2C
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.evaluation import evaluate_policy

import os

2023-06-27 12:43:34.565363: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
env = gym.make("CartPole-v1", render_mode="rgb_array")

In [4]:
save_freq = 1000

checkpoint_callback = CheckpointCallback(
  save_freq=save_freq,
  save_path="./logs/",
  name_prefix="rl_model",
  save_replay_buffer=False,
  save_vecnormalize=True,
)

In [5]:
for filename in os.listdir("./logs"):
    os.remove("./logs/" + filename)

total_steps = 10000

model = A2C("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=total_steps,
            callback=checkpoint_callback,
            progress_bar=True)

model.save("./logs/a2c_trained")

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 35       |
|    ep_rew_mean        | 35       |
| time/                 |          |
|    fps                | 329      |
|    iterations         | 100      |
|    time_elapsed       | 1        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -0.624   |
|    explained_variance | -0.0466  |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 1.44     |
|    value_loss         | 9.35     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 42.1     |
|    ep_rew_mean        | 42.1     |
| time/                 |          |
|    fps                | 379      |
|    iterations         | 200      |
|    time_elapsed       | 2        |
|    total_timesteps    | 1000     |
| train/                |          |
|

In [6]:
import os

filenames = ["./logs/rl_model_" + str(i*save_freq) + "_steps.zip"
             for i in range(1, 11)]

In [7]:
import time

for filename in filenames:
    print(filename)
    m = A2C.load(filename, env=env)
    env = m.get_env()
    obs = env.reset()
    done = False
    total_reward = 0
    while not done:
        action, _state = m.predict(obs, deterministic=True)
        obs, reward, done, info = env.step(action)
        total_reward += reward
        env.render("human")
    print(filename)
    print("    Total reward: " + str(total_reward))
    time.sleep(1)

./logs/rl_model_1000_steps.zip
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
./logs/rl_model_1000_steps.zip
    Total reward: [91.]
./logs/rl_model_2000_steps.zip
./logs/rl_model_2000_steps.zip
    Total reward: [89.]
./logs/rl_model_3000_steps.zip
./logs/rl_model_3000_steps.zip
    Total reward: [126.]
./logs/rl_model_4000_steps.zip
./logs/rl_model_4000_steps.zip
    Total reward: [128.]
./logs/rl_model_5000_steps.zip
./logs/rl_model_5000_steps.zip
    Total reward: [223.]
./logs/rl_model_6000_steps.zip
./logs/rl_model_6000_steps.zip
    Total reward: [260.]
./logs/rl_model_7000_steps.zip
./logs/rl_model_7000_steps.zip
    Total reward: [235.]
./logs/rl_model_8000_steps.zip
./logs/rl_model_8000_steps.zip
    Total reward: [181.]
./logs/rl_model_9000_steps.zip
./logs/rl_model_9000_steps.zip
    Total reward: [366.]
./logs/rl_model_10000_steps.zip
./logs/rl_model_10000_steps.zip
    Total reward: [180.]


In [8]:
model = A2C.load("./logs/a2c_trained.zip", env=env)
env = model.get_env()
obs = env.reset()
done = False
total_reward = 0
while not done:
    action, _state = model.predict(obs, deterministic=True)
    obs, reward, done, info = env.step(action)
    total_reward += reward
    env.render("human")
print("    Total reward: " + str(total_reward))

    Total reward: [164.]
