In [None]:
# Importing Necessary Libraries
import gymnasium as gym
from stable_baselines3 import PPO, DDPG, A2C
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor

# Importing PyTorch
import torch
from torch import nn

In [None]:
# Creating the Environment
env = make_vec_env('HalfCheetah-v4', n_envs=1)

# custom network architecture
policy_kwargs = dict(activation_fn = torch.nn.ReLU, net_arch=dict(pi=[128, 64], qf=[128, 128]))

# Model
model = DDPG(
    policy='MlpPolicy',
    env=env,
    learning_rate=0.00032,
    batch_size=100, 
    tau=0.05,
    gamma=0.99, 
    verbose=1,
    policy_kwargs=policy_kwargs
)

# Train the model
model.learn(total_timesteps=4000000)

In [None]:
model.env = make_vec_env('HalfCheetah-v4', n_envs=1)
model.learning_rate = 0.0005


model.learn(total_timesteps=1000000)

In [None]:
model.env = make_vec_env('HalfCheetah-v4', n_envs=2)
model.learning_rate = 0.0005
model.gamma = 0.9

model.learn(total_timesteps=1000000)

In [None]:
# Evaluate the model 
mean_reward, std_reward = evaluate_policy(model, env=make_vec_env('HalfCheetah-v4', n_envs=1), n_eval_episodes=100)
print(f'Reward = {mean_reward} +/- {std_reward}')

policy = 'MlpPolicy',
    env = env,
    learning_rate = 0.0001,
    batch_size = 64,
    tau = 0.005,
    gamma = 0.9, 
    verbose=1
Reward = 1361.3731310800001 +/- 70.26345065459948


model = DDPG(
    policy = 'MlpPolicy',
    env = env,
    learning_rate = 0.001,
    batch_size = 16,
    tau = 0.005,
    gamma = 0.9, 
    verbose=1
)
Reward = 893.96030944 +/- 62.91451376119569

model = DDPG(
    policy = 'MlpPolicy',
    env = env,
    learning_rate = 0.0001,
    batch_size = 16,
    tau = 0.005,
    gamma = 0.9, 
    verbose=1
)
Reward = 1354.26040748 +/- 35.594102669055225



In [None]:
# Before uploading the model to the hub, login and verify your credentials
!huggingface-cli login --token $your_hf_token --add-to-git-credential


# Uploading model to hugging face
import gymnasium as gym
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env

from huggingface_sb3 import package_to_hub

model_name = "DDPG-HalfCheetah-v4"

# Define a repo_id
repo_id = 'Amankankriya/DDPG-HalfCheetah-v4'

# Define the name of the environment
env_id = 'HalfCheetah-v4'

# Create the evaluation env and set the render_mode="rgb_array"
eval_env = DummyVecEnv([lambda: gym.make(env_id, render_mode="rgb_array")])


# Define the model architecture we used
model_architecture = "DDPG"

## Define the commit message
commit_message = "trained model for HalfCheetah-v4 using DDPG"

# method save, evaluate, generate a model card and record a replay video of your agent before pushing the repo to the hub
package_to_hub(model=model, # Our trained model
               model_name=model_name, # The name of our trained model 
               model_architecture=model_architecture, # The model architecture we used: in our case PPO
               env_id=env_id, # Name of the environment
               eval_env=eval_env, # Evaluation Environment
               repo_id=repo_id, # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance Amankankriya/ppo-CarRacing-v2
               commit_message=commit_message)
