In [2]:
"""
Implements unit 1 of the deep RL course
"""
import gym
from huggingface_sb3 import load_from_hub, package_to_hub, push_to_hub
from pyvirtualdisplay import Display
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.vec_env import DummyVecEnv
from huggingface_hub import notebook_login


def env_info(env_name):
    """
    Print initial environment information
    """
    env = gym.make(env_name)
    _ = env.reset()
    for _ in range(20):
        # Take a random action
        action = env.action_space.sample()
        print("Action taken:", action)
        observation, reward, done, info = env.step(action)

        if done:
            observation = env.reset()
            print("Environment is reset")

    env = gym.make(env_name)
    print("___OBSERVATION SPACE___")
    print("Observation space shape:", env.observation_space.shape)
    print("Sample observation:", env.observation_space.sample())

    print("___ACTION SPACE___")
    print("Action space shape:", env.action_space.n)
    print("Action space sample:", env.action_space.sample())


def train(env_name, batch_size=16, num_steps=1000000):
    """
    Train the model
    """

    # Vectorized environment, so we can batch examples during training
    env = make_vec_env(env_name, n_envs=batch_size)
    model = PPO(
        policy="MlpPolicy",
        env=env,
        n_steps=1024,
        batch_size=64,
        n_epochs=4,
        gamma=0.999,
        gae_lambda=0.98,
        ent_coef=0.01,
        verbose=1,
    )
    model.learn(total_timesteps=num_steps)
    model_name = f"ppo-{env_name}"
    model.save(model_name)
    return model


def evaluate(env_name, model):
    """
    Evaluate the trained model
    """
    env = gym.make(env_name)
    mean_reward, std_reward = evaluate_policy(
        model, env, n_eval_episodes=10, deterministic=True
    )
    print(f"Mean reward: {mean_reward:.2f} +/- {std_reward}")


def publish(model, env_name, model_name):
    """
    Publish the model to the huggingface hub (requires login through the CLI)
    """
    repo_id = "arkadyark/deep-rl-course-unit-1"
    env_id = env_name
    eval_env = DummyVecEnv([lambda: gym.make(env_id)])
    model_architecture = "PPO"
    commit_message = "Push LunarLander-v2 model"

    package_to_hub(
        model,
        model_name=model_name,
        model_architecture=model_architecture,
        env_id=env_id,
        eval_env=eval_env,
        repo_id=repo_id,
        commit_message=commit_message,
    )


In [3]:
virtual_display = Display(visible=0, size=(1400, 900))
virtual_display.start()

<pyvirtualdisplay.display.Display at 0x7f521c57cdc0>

In [4]:
env_name = "LunarLander-v2"
model_name = f"arkadyark/{env_name}"
env_info(env_name)

Action taken: 3
Action taken: 3
Action taken: 2
Action taken: 3
Action taken: 1
Action taken: 2
Action taken: 2
Action taken: 2
Action taken: 3
Action taken: 1
Action taken: 2
Action taken: 2
Action taken: 1
Action taken: 3
Action taken: 1
Action taken: 0
Action taken: 0
Action taken: 3
Action taken: 2
Action taken: 2
___OBSERVATION SPACE___
Observation space shape: (8,)
Sample observation: [-0.04787325 -0.836816   -0.3753528  -0.04970386  0.41284412  0.2904583
  0.636347   -1.0349195 ]
___ACTION SPACE___
Action space shape: 4
Action space sample: 0


In [8]:
model = train(env_name, 64)

Using cuda device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 91.7     |
|    ep_rew_mean     | -198     |
| time/              |          |
|    fps             | 9032     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 65536    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 95.8         |
|    ep_rew_mean          | -185         |
| time/                   |              |
|    fps                  | 4801         |
|    iterations           | 2            |
|    time_elapsed         | 27           |
|    total_timesteps      | 131072       |
| train/                  |              |
|    approx_kl            | 0.0066265482 |
|    clip_fraction        | 0.0728       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.38        |
|    explained_variance   | -0.000734   

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 188         |
|    ep_rew_mean          | -5.25       |
| time/                   |             |
|    fps                  | 3194        |
|    iterations           | 11          |
|    time_elapsed         | 225         |
|    total_timesteps      | 720896      |
| train/                  |             |
|    approx_kl            | 0.006739664 |
|    clip_fraction        | 0.0565      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.14       |
|    explained_variance   | 0.000183    |
|    learning_rate        | 0.0003      |
|    loss                 | 335         |
|    n_updates            | 40          |
|    policy_gradient_loss | -0.00321    |
|    value_loss           | 598         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 300   

In [6]:
notebook_login()
!git config --global credential.helper store

Token is valid.
Your token has been saved in your configured git credential helpers (store).
Your token has been saved to /home/ark/.cache/huggingface/token
Login successful


In [9]:
publish(model, env_name, model_name)

[38;5;4mℹ This function will save, evaluate, generate a video of your agent,
create a model card and push everything to the hub. It might take up to 1min.
This is a work in progress: if you encounter a bug, please open an issue.[0m


libGL error: MESA-LOADER: failed to open swrast: /usr/lib/dri/swrast_dri.so: cannot open shared object file: No such file or directory (search paths /usr/lib/x86_64-linux-gnu/dri:\$${ORIGIN}/dri:/usr/lib/dri, suffix _dri)
libGL error: failed to load driver: swrast


ContextException: Could not create GL context