In [16]:
import gymnasium as gym
import ale_py
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.atari_wrappers import AtariWrapper
from stable_baselines3 import PPO
import os

# 生成向量化环境
env = make_vec_env(
    "ALE/Alien-v5",
    n_envs=4,
    wrapper_class=AtariWrapper,
    vec_env_cls=None  # 默认 DummyVecEnv
)

env.envs[0].spec

EnvSpec(id='ALE/Alien-v5', entry_point='ale_py.env:AtariEnv', reward_threshold=None, nondeterministic=False, max_episode_steps=None, order_enforce=True, disable_env_checker=False, kwargs={'game': 'alien', 'repeat_action_probability': 0.25, 'full_action_space': False, 'frameskip': 4, 'max_num_frames_per_episode': 108000, 'render_mode': 'rgb_array'}, namespace='ALE', name='Alien', version=5, additional_wrappers=(WrapperSpec(name='Monitor', entry_point='stable_baselines3.common.monitor:Monitor', kwargs=None), WrapperSpec(name='NoopResetEnv', entry_point='stable_baselines3.common.atari_wrappers:NoopResetEnv', kwargs=None), WrapperSpec(name='MaxAndSkipEnv', entry_point='stable_baselines3.common.atari_wrappers:MaxAndSkipEnv', kwargs=None), WrapperSpec(name='EpisodicLifeEnv', entry_point='stable_baselines3.common.atari_wrappers:EpisodicLifeEnv', kwargs=None), WrapperSpec(name='FireResetEnv', entry_point='stable_baselines3.common.atari_wrappers:FireResetEnv', kwargs=None), WrapperSpec(name='Wa

In [27]:
tensorboard_path = r'./runs/ppo_alien'
model_path = r"./runs/ppo_alien/ppo_alien-v5"

model = PPO(
    'CnnPolicy',
    env,
    verbose=1,
    tensorboard_log = os.path.join(tensorboard_path, 'tensorboard'),
    n_steps=128,
    batch_size = 256,
    n_epochs = 4,
    learning_rate = 2e-4,
    gamma = 0.99,
    clip_range = 0.1,
    ent_coef = 0.01, 
)

# model.learn(total_timesteps = 10_000)

model.save(model_path)
del model

# 加载模型
model = PPO.load(model_path, env=env)

Using cuda device
Wrapping the env in a VecTransposeImage.
Logging to ./runs/ppo_alien\tensorboard\PPO_2
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 601      |
|    ep_rew_mean     | 370      |
| time/              |          |
|    fps             | 318      |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 512      |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 643         |
|    ep_rew_mean          | 356         |
| time/                   |             |
|    fps                  | 305         |
|    iterations           | 2           |
|    time_elapsed         | 3           |
|    total_timesteps      | 1024        |
| train/                  |             |
|    approx_kl            | 0.002726088 |
|    clip_fraction        | 0.00488     |
|    clip_range           | 0.1         |
|    entr

In [29]:
!jupyter nbconvert --to script atari_ppo.ipynb

[NbConvertApp] Converting notebook atari_ppo.ipynb to script
[NbConvertApp] Writing 1367 bytes to atari_ppo.py


In [28]:
base_env = gym.make("ALE/Alien-v5", render_mode="human")
eval_env = AtariWrapper(base_env)
model = PPO.load("./runs/ppo_alien/ppo_alien-v5")

obs, _ = eval_env.reset()
done, truncated = False, False

while not done and not truncated:
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, done, truncated, info = eval_env.step(action)
    eval_env.render()
