In [None]:
# Importing necessary Libraries
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy


env = make_vec_env('CarRacing-v2', n_envs=8)

# Initialize the model
model = PPO(policy = "CnnPolicy",
            env = env,
            batch_size = 128,
            clip_range = 0.1,
            ent_coef = 0.01,
            gae_lambda = 0.9,
            gamma = 0.99,
            learning_rate = 0.00005,
            max_grad_norm = 0.5,
            n_epochs = 4,
            n_steps = 256,
            vf_coef = 0.5,
            verbose=1)

model.learn(total_timesteps=1e7)

In [None]:
# Evaluate the model
mean_reward, std_reward =  evaluate_policy(model, env, n_eval_episodes=25)
print('Mean Reward :', mean_reward)
print('Deviation :', std_reward)

In [None]:
# Save the model
model.save("ppo_carracing")

<h1>PPO</h1>


| Learning Rate | Mean Reward | Deviation |
|---------------|-------------|-----------|
| 0.00001       | -59.55538536| 25.310341824182267 |
| 0.00005       | 289.0281644 | 120.81766432392448 |
| 0.0001        | 81.42063832 | 76.9650508453757   |
| 0.0002        | 65.2997484  | 72.21204323808004  |
| 0.0005        | -14.38487644| 61.22441748132813  |

In [None]:
# Recording the video
from stable_baselines3.common.vec_env import VecVideoRecorder, DummyVecEnv

env_id = 'CarRacing-v2'
video_folder = './videos'
video_length = 4500

vec_env = DummyVecEnv([lambda: gym.make(env_id, render_mode="rgb_array")])
obs = vec_env.reset()

vec_env = VecVideoRecorder(vec_env, video_folder,
                       record_video_trigger=lambda x: x == 0, video_length=video_length,
                       name_prefix=f"car-racing-{env_id}-1")

vec_env.reset()

for i in range(video_length + 1):
    action, _states = model.predict(obs, deterministic=True)
    obs, rewards, dones, info = vec_env.step(action)

vec_env.close()

In [None]:
# Load the model
model = PPO.load("ppo_carracing.zip")

In [None]:
# Before uploading the model to the hub, login and verify your credentials
!huggingface-cli login --token $your_hf_token --add-to-git-credential


# Uploading model to hugging face
import gymnasium as gym
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env

from huggingface_sb3 import package_to_hub

model_name = "ppo-CarRacing-v2"

# Define a repo_id
repo_id = 'Amankankriya/ppo-CarRacing-v2'

# Define the name of the environment
env_id = 'CarRacing-v2'

# Create the evaluation env and set the render_mode="rgb_array"
eval_env = DummyVecEnv([lambda: gym.make(env_id, render_mode="rgb_array")])


# Define the model architecture we used
model_architecture = "PPO"

## Define the commit message
commit_message = "trained model for CarRacing-v2 using PPO"

# method save, evaluate, generate a model card and record a replay video of your agent before pushing the repo to the hub
package_to_hub(model=model, # Our trained model
               model_name=model_name, # The name of our trained model 
               model_architecture=model_architecture, # The model architecture we used: in our case PPO
               env_id=env_id, # Name of the environment
               eval_env=eval_env, # Evaluation Environment
               repo_id=repo_id, # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance Amankankriya/ppo-CarRacing-v2
               commit_message=commit_message)
