In [4]:
import pybullet_envs
import panda_gym
import gym

import os

from huggingface_sb3 import load_from_hub, package_to_hub

from stable_baselines3 import A2C
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.env_util import make_vec_env

from huggingface_hub import notebook_login

In [5]:
env_id = "AntBulletEnv-v0"

env = gym.make(env_id)

s_size = env.observation_space.shape[0]
a_size = env.action_space

In [6]:
env = make_vec_env(env_id, n_envs=4)

env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.0)

In [7]:
model = A2C(
    policy="MlpPolicy",
    env=env,
    gae_lambda=0.9,
    gamma=0.99,
    learning_rate=0.00096,
    max_grad_norm=0.5,
    n_steps=8,
    vf_coef=0.4,
    ent_coef=0.0,
    policy_kwargs=dict(log_std_init=-2, ortho_init=False),
    normalize_advantage=False,
    use_rms_prop=True,
    use_sde=True,
    verbose=1,
)

Using cuda device


In [None]:
model.learn(2_000_000)

In [9]:
model.save("a2c-AntBulletEnv-v0")
env.save("vec_normalize.pkl")

In [10]:
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize

eval_env = DummyVecEnv([lambda: gym.make("AntBulletEnv-v0")])
eval_env = VecNormalize.load("vec_normalize.pkl", eval_env)

eval_env.training = False
eval_env.norm_reward = False

model = A2C.load("a2c-AntBulletEnv-v0")

mean_reward, std_reward = evaluate_policy(model, env)

print(f"Mean reward = {mean_reward:.2f} +/- {std_reward:.2f}")

Mean reward = 1551.01 +/- 323.95


In [12]:
package_to_hub(
    model=model,
    model_name=f"a2c-{env_id}",
    model_architecture="A2C",
    env_id=env_id,
    eval_env=eval_env,
    repo_id=f"akghxhs55/a2c-{env_id}",
    commit_message="Initial commit",
)

[38;5;4mℹ This function will save, evaluate, generate a video of your agent,
create a model card and push everything to the hub. It might take up to 1min.
This is a work in progress: if you encounter a bug, please open an issue.[0m
Saving video to /tmp/tmptg7trqa0/-step-0-to-step-1000.mp4


ffmpeg version 4.2.7-0ubuntu0.1 Copyright (c) 2000-2022 the FFmpeg developers
  built with gcc 9 (Ubuntu 9.4.0-1ubuntu1~20.04.1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-avresample --disable-filter=resample --enable-avisynth --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librsvg --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --e

[38;5;4mℹ Pushing repo akghxhs55/a2c-AntBulletEnv-v0 to the Hugging Face
Hub[0m



[A


[A[A[A

vec_normalize.pkl:   0%|          | 0.00/2.22k [00:00<?, ?B/s]



[A[A[A[A


[A[A[A
vec_normalize.pkl: 100%|██████████| 2.22k/2.22k [00:00<00:00, 3.77kB/s]

[A[A

vec_normalize.pkl: 100%|██████████| 2.22k/2.22k [00:01<00:00, 1.38kB/s]
policy.pth: 100%|██████████| 57.0k/57.0k [00:01<00:00, 28.5kB/s]
policy.optimizer.pth: 100%|██████████| 56.2k/56.2k [00:01<00:00, 28.1kB/s]
a2c-AntBulletEnv-v0.zip: 100%|██████████| 129k/129k [00:02<00:00, 57.6kB/s]




replay.mp4: 100%|██████████| 1.10M/1.10M [00:02<00:00, 396kB/s] 




pytorch_variables.pth: 100%|██████████| 431/431 [00:01<00:00, 262B/s]




Upload 6 LFS files: 100%|██████████| 6/6 [00:03<00:00,  1.84it/s]


[38;5;4mℹ Your model is pushed to the Hub. You can view your model here:
https://huggingface.co/akghxhs55/a2c-AntBulletEnv-v0/tree/main/[0m


'https://huggingface.co/akghxhs55/a2c-AntBulletEnv-v0/tree/main/'