In [None]:
import gymnasium as gym

# Create the environment
env = gym.make('Pong-v4')
print("_____OBSERVATION SPACE_____ \n")
print("Observation Space Shape", env.observation_space.shape)

# Reset the environment
env.reset()

In [None]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy

env = make_vec_env('Pong-v4', n_envs=4)

# Initialize the model
model = PPO(policy = "CnnPolicy",
            env = env,
            batch_size = 128,
            clip_range = 0.1,
            ent_coef = 0.01,
            gae_lambda = 0.9,
            gamma = 0.99,
            learning_rate = 0.0001,
            max_grad_norm = 0.5,
            n_epochs = 4,
            n_steps = 256,
            vf_coef = 0.5,
            verbose=1)

# Train the model
model.learn(total_timesteps=int(1e8))

In [None]:
# Save the model
model.save("ppo-pong-v4")

In [None]:
# Evaluation
mean_reward, std_reward =  evaluate_policy(model, env, n_eval_episodes=25)
print('Mean Reward :', mean_reward)
print('Deviation :', std_reward)

In [None]:
import gymnasium as gym
from stable_baselines3.common.vec_env import VecVideoRecorder, DummyVecEnv

env_id = "Pong-v4"
video_folder = "./videos/"
video_length = 1500

vec_env = DummyVecEnv([lambda: gym.make(env_id, render_mode="rgb_array")])

obs = vec_env.reset()

# Record the video starting at the first step
vec_env = VecVideoRecorder(vec_env, video_folder,
                       record_video_trigger=lambda x: x == 0, video_length=video_length,
                       name_prefix=f"random-agent-{env_id}-1")

vec_env.reset()
for i in range(video_length + 1):
  action, _states = model.predict(obs, deterministic=True)
  obs, rewards, dones, info = vec_env.step(action)
  
# Save the video
vec_env.close()