# Installations and Imports

In [None]:
!apt install python-opengl
!apt install ffmpeg
!apt install xvfb
!pip3 install pyvirtualdisplay

# Virtual display
from pyvirtualdisplay import Display

virtual_display = Display(visible=0, size=(1400, 900))
virtual_display.start()

In [2]:
!pip install gym[box2d] --quiet
!pip install stable-baselines3[extra] --quiet
!pip install huggingface_sb3 --quiet
!pip install pyglet --quiet
!pip install ale-py==0.7.4 --quiet # To overcome an issue with gym (https://github.com/DLR-RM/stable-baselines3/issues/875)

[K     |████████████████████████████████| 448 kB 5.1 MB/s 
[K     |████████████████████████████████| 177 kB 4.9 MB/s 
[K     |████████████████████████████████| 1.5 MB 64.6 MB/s 
[K     |████████████████████████████████| 1.6 MB 49.6 MB/s 
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
  Building wheel for gym (setup.py) ... [?25l[?25hdone
  Building wheel for AutoROM.accept-rom-license (PEP 517) ... [?25l[?25hdone
[K     |████████████████████████████████| 84 kB 2.8 MB/s 
[K     |████████████████████████████████| 596 kB 33.0 MB/s 
[K     |████████████████████████████████| 1.6 MB 5.3 MB/s 
[?25h

In [3]:
import gym

from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_vec_env

from huggingface_hub import notebook_login
from huggingface_sb3 import package_to_hub, load_from_hub, push_to_hub

# Create the Environment

In [4]:
env_id = "LunarLander-v2"

env = gym.make(env_id)
env.reset()

print("__________ OBSERVATION SPACE __________ ")
print(f"Observation Space Dim: {env.observation_space.shape}, Type {type(env.observation_space)}")
print(f"Observation Space Example: {env.observation_space.sample()}\n\n")

print("__________ ACTION SPACE __________ ")
print(f"Action Space Dim: {env.action_space.shape} Type: {type(env.action_space)}")
print(f"Action Space Example: {env.action_space.sample()}")

__________ OBSERVATION SPACE __________ 
Observation Space Dim: (8,), Type <class 'gym.spaces.box.Box'>
Observation Space Example: [-0.28322414 -0.8344068  -0.51862127 -1.1527661   0.7222115   0.1041571
 -1.419795   -1.5416954 ]


__________ ACTION SPACE __________ 
Action Space Dim: () Type: <class 'gym.spaces.discrete.Discrete'>
Action Space Example: 0


In [12]:
# Make a vectorized environment
env = make_vec_env(env_id, n_envs=16)

# Create and Train the Model

In [13]:
model = PPO("MlpPolicy", env=env, verbose=1)

TOTAL_TIMESTEPS = 1e6
model.learn(total_timesteps=TOTAL_TIMESTEPS)

Using cuda device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 103      |
|    ep_rew_mean     | -181     |
| time/              |          |
|    fps             | 3995     |
|    iterations      | 1        |
|    time_elapsed    | 8        |
|    total_timesteps | 32768    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 98.7        |
|    ep_rew_mean          | -143        |
| time/                   |             |
|    fps                  | 1661        |
|    iterations           | 2           |
|    time_elapsed         | 39          |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.009757464 |
|    clip_fraction        | 0.0901      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0.00209     |
|    learnin

<stable_baselines3.ppo.ppo.PPO at 0x7fd33f966b90>

# Evaluate Model

In [18]:
eval_env = gym.make(env_id)

reward_mean, reward_std = evaluate_policy(model, eval_env, n_eval_episodes=100, deterministic=True)
print(f"Mean Reward: {reward_mean} Reward Std: {reward_std}")



Mean Reward: 263.208052731326 Reward Std: 27.58380459251054


In [19]:
obs = eval_env.reset()

for _ in range(1000):
  action, _states = model.predict(obs, deterministic=True)
  obs, rew, done, info = eval_env.step(action)
  eval_env.render()
  if done:
    obs = eval_env.reset()

# Save the Model

In [20]:
model_name = "./ppo_lunar-lander-v2"
model.save(model_name)

# To load the model
# model2 = PPO.load(model_name, env=eval_env)

# Publish Model To Hub

In [10]:
notebook_login()
!git config --global credential.helper store

Login successful
Your token has been saved to /root/.huggingface/token


In [17]:
from stable_baselines3.common.vec_env import DummyVecEnv

repo_id = "danieladejumo/ppo_lunar-lander-v2"
model_architecture = "PPO"
commit_message = "First Full Training"
eval_env = DummyVecEnv([lambda: gym.make(env_id)])

package_to_hub(model=model,
               model_name=model_name,
               model_architecture=model_architecture,
               env_id=env_id,
               eval_env=eval_env,
               repo_id=repo_id,
               commit_message=commit_message)

[38;5;4mℹ This function will save, evaluate, generate a video of your agent,
create a model card and push everything to the hub. It might take up to 1min.
This is a work in progress: if you encounter a bug, please open an issue and use
push_to_hub instead.[0m


/content/hub/ppo_lunar-lander-v2 is already a clone of https://huggingface.co/danieladejumo/ppo_lunar-lander-v2. Make sure you pull the latest changes with `repo.git_pull()`.


Saving video to /content/-step-0-to-step-1000.mp4
[38;5;4mℹ Pushing repo ppo_lunar-lander-v2 to the Hugging Face Hub[0m


Upload file replay.mp4:   2%|1         | 3.34k/204k [00:00<?, ?B/s]

Upload file ppo_lunar-lander-v2/policy.optimizer.pth:   4%|4         | 3.34k/82.9k [00:00<?, ?B/s]

Upload file ppo_lunar-lander-v2.zip:   2%|2         | 3.34k/141k [00:00<?, ?B/s]

Upload file ppo_lunar-lander-v2/policy.pth:   8%|7         | 3.34k/42.2k [00:00<?, ?B/s]

remote: Enforcing permissions...        
remote: Allowed refs: all        
To https://huggingface.co/danieladejumo/ppo_lunar-lander-v2
   c9556cc..dae90f4  main -> main



[38;5;4mℹ Your model is pushed to the hub. You can view your model here:
https://huggingface.co/danieladejumo/ppo_lunar-lander-v2[0m


'https://huggingface.co/danieladejumo/ppo_lunar-lander-v2'