In [None]:
# Importing necessary Libraries
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy

In [None]:
# Creating the environment
env = make_vec_env('ALE/Galaxian-v5', n_envs=1)

# Creating a model
model = PPO(
            "CnnPolicy",
            env,
            n_steps=128,
            batch_size=256,
            gae_lambda=0.95,
            gamma=0.99,
            n_epochs=4,
            ent_coef=0.01,
            learning_rate=2.5e-4,
            vf_coef=0.5,
            max_grad_norm=0.5,
            clip_range=0.2,
            verbose=1
        )

model.learn(total_timesteps=10000000)

In [None]:
# save the model
model.save("PPO-Galaxian-v5")

In [None]:
# Evaluating the model
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=25)
print(f'Reward: {mean_reward} +/- {std_reward}')

In [None]:
# Recording the video
from stable_baselines3.common.vec_env import VecVideoRecorder, DummyVecEnv

env_id = 'ALE/Galaxian-v5'
video_folder = './videos'
video_length = 1950

vec_env = DummyVecEnv([lambda: gym.make(env_id, render_mode='rgb_array')])
obs = vec_env.reset()

vec_env = VecVideoRecorder(vec_env, video_folder=video_folder, record_video_trigger=lambda x: x%1000 == 0, 
                           video_length=video_length, name_prefix=f'Galaxian-v5-{video_length}')

vec_env.reset()

for i in range(video_length+1):
    action, _states = model.predict(obs)
    obs, rewards, done, info = vec_env.step(action)

vec_env.close()