<a href="https://colab.research.google.com/github/damilare-akin/deep-rl-class/blob/main/personal-notebooks/a2c_half_cheetah.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

install dependencies

In [5]:
!pip install pybullet
!pip install stable-baselines3[extra]
!pip install huggingface_sb3
!pip install huggingface_hub

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


load dependencies

In [6]:
import gym
import pybullet_envs

import os

from huggingface_sb3 import load_from_hub, package_to_hub

from stable_baselines3 import A2C
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.env_util import make_vec_env

from huggingface_hub import notebook_login

import torch 
from torch import nn

In [7]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

## Create the half-cheetah environment

In [8]:
env_id = 'HalfCheetahBulletEnv-v0'

# create the env
env = gym.make(env_id)

# initialize the action and state sizes
# n_action = env.action_space.n
# n_state = env.observation_space.shape[0]

normalizing input features by wrapping the current env with `VecNormalize`

In [9]:
env = make_vec_env(env_id, n_envs=4)

# Adding this wrapper to normalize the observation and the reward
env = VecNormalize(env, norm_obs=True, norm_reward=False, clip_obs=10.)

In [10]:
model = A2C(policy = "MlpPolicy",
            env = env,
            gae_lambda = 0.9,
            gamma = 0.99,
            learning_rate = 0.00096,
            max_grad_norm = 0.5,
            n_steps = 8,
            vf_coef = 0.4,
            ent_coef = 0.0,
            tensorboard_log = "./tensorboard",
            policy_kwargs=dict(
            log_std_init=-2, ortho_init=False),
            normalize_advantage=False,
            use_rms_prop= True,
            use_sde= True,
            verbose=1)

Using cuda device


In [11]:
model.learn(1_500_000)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
| time/                 |          |
|    fps                | 657      |
|    iterations         | 19100    |
|    time_elapsed       | 929      |
|    total_timesteps    | 611200   |
| train/                |          |
|    entropy_loss       | -1.34    |
|    explained_variance | -3.06    |
|    learning_rate      | 0.00096  |
|    n_updates          | 19099    |
|    policy_loss        | 1.54     |
|    std                | 0.0636   |
|    value_loss         | 6.57     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1e+03    |
|    ep_rew_mean        | 1.18e+03 |
| time/                 |          |
|    fps                | 657      |
|    iterations         | 19200    |
|    time_elapsed       | 934      |
|    total_timesteps    | 614400   |
| train/                |          |
|    entropy_loss       | -1.59    |
|    expla

<stable_baselines3.a2c.a2c.A2C at 0x7fd0ec3b6cd0>

In [12]:
# Save the model and  VecNormalize statistics when saving the agent
model.save("HalfCheetahBulletEnv-v0")
env.save("vec_normalize.pkl")

In [14]:
# load the saved statistics
eval_env = DummyVecEnv([lambda: gym.make("HalfCheetahBulletEnv-v0")])
eval_env = VecNormalize.load("vec_normalize.pkl", eval_env)

#  do not update them at test time
eval_env.training = False
# reward normalization is not needed at test time
eval_env.norm_reward = False

# Load the agent
model = A2C.load("HalfCheetahBulletEnv-v0")

mean_reward, std_reward = evaluate_policy(model, env)

print(f"Mean reward = {mean_reward:.2f} +/- {std_reward:.2f}")

Mean reward = 1037.67 +/- 95.07


In [15]:
from huggingface_hub import notebook_login
notebook_login()

Token is valid.
Your token has been saved in your configured git credential helpers (store).
Your token has been saved to /root/.huggingface/token
Login successful


In [16]:
package_to_hub(
    model=model,
    model_name=f"a2c-{env_id}",
    model_architecture="A2C",
    env_id=env_id,
    eval_env=eval_env,
    repo_id=f"damilare-akin/a2c-{env_id}",
    commit_message="Initial commit",
)

[38;5;4mℹ This function will save, evaluate, generate a video of your agent,
create a model card and push everything to the hub. It might take up to 1min.
This is a work in progress: if you encounter a bug, please open an issue.[0m




Saving video to /tmp/tmpudwflm40/-step-0-to-step-1000.mp4
[38;5;4mℹ Pushing repo damilare-akin/a2c-HalfCheetahBulletEnv-v0 to the Hugging
Face Hub[0m
[38;5;4mℹ Your model is pushed to the Hub. You can view your model here:
https://huggingface.co/damilare-akin/a2c-HalfCheetahBulletEnv-v0/tree/main/[0m


'https://huggingface.co/damilare-akin/a2c-HalfCheetahBulletEnv-v0/tree/main/'