## Install Libraries

In [None]:
%%capture
!pip install pybullet
!pip install stable-baselines3[extra]
!pip install huggingface_sb3
!pip install huggingface_hub

## Imports

In [None]:
import gym
import pybullet_envs

from stable_baselines3 import A2C
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.env_util import make_vec_env

## Create and Inspect Environment

In [None]:
ENV_ID = "HalfCheetahBulletEnv-v0"

# create the environment
env = gym.make(ENV_ID)

# Get the state space and action space
s_size = env.observation_space.shape
a_size = env.action_space.shape

print("_____OBSERVATION SPACE_____ \n")
print("The State Space is: ", s_size)
print("Sample observation", env.observation_space.sample()) # Get a random observation

print("\n _____ACTION SPACE_____ \n")
print("The Action Space is: ", a_size)
print("Action Space Sample", env.action_space.sample()) # Take a random action

In [None]:
env = make_vec_env(env_id=ENV_ID, n_envs=4)

# Add sb3_contrib.common.wrappers.TimeFeatureWrapper
#TODO

# Add wrapper to normalize the observations
env = VecNormalize(env, norm_obs=True, norm_reward=False, clip_obs=10.0)
env.observation_space.sample()

## Create and Train the Model

In [None]:
model = A2C(policy="MlpPolicy",
            env=env,
            learning_rate=0.00096,
            n_steps=8,
            gamma=0.99,
            gae_lambda=0.9,
            ent_coef=0.0,
            vf_coef=0.4,
            max_grad_norm=0.5,
            use_rms_prop=True,
            use_sde=True,
            normalize_advantage=False,
            tensorboard_log="./tensorboard",
            policy_kwargs=dict(log_std_init=-2, ortho_init=False),
            verbose=1,
            seed=42,
            device="auto")

In [None]:
model.learn(total_timesteps=2_000_000)

## Save the Model

In [None]:
# Save the model
model.save(f"a2c-{ENV_ID}")

# Save VecNormalize Statistics
env.save("vec_normalize.pkl")

## Evaluate the Agent

In [None]:
# Create evaluation env and load the saved statistics
eval_env = DummyVecEnv([lambda: gym.make(ENV_ID)])
eval_env = VecNormalize.load("vec_normalize.pkl", eval_env)

# Do not update VecNormalize statistics during evaluation
eval_env.training = False

# Do not normalize reward during evaluation. There is no need to do that
eval_env.norm_reward = False

# Load the model
model = A2C.load(f"a2c-{ENV_ID}")



# Evaluate the agent
mean_reward, std_reward = evaluate_policy(model, eval_env)
print(f"Mean Reward: {mean_reward:.2f} +/- {std_reward:.2f}")

## Publish the trained model on Hugging Face Hub

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
from huggingface_sb3 import package_to_hub, load_from_hub

package_to_hub(model=model,
               model_name=f"a2c-{ENV_ID}",
               model_architecture="A2C",
               env_id=ENV_ID,
               eval_env=eval_env,
               repo_id=f"danieladejumo/a2c-{ENV_ID}",
               commit_message="Initial Commit")

## Load from Hub

In [None]:
checkpoint = load_from_hub(repo_id=f"danieladejumo/a2c-{ENV_ID}",
                          filename=f"a2c-{ENV_ID}.zip")

model = A2C.load(checkpoint)

# Evaluate the agent
eval_env = DummyVecEnv([lambda: gym.make(ENV_ID)])
eval_env = VecNormalize.load("vec_normalize.pkl", eval_env)
eval_env.training = False
eval_env.norm_reward = False

mean_reward, std_reward = evaluate_policy(model, eval_env)
print(f"Mean Reward: {mean_reward:.2f} +/- {std_reward:.2f}")