<a href="https://colab.research.google.com/github/ccasanoval/RLtests/blob/master/LanderV0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Lunar Lander v0 for HuggingFace

In [4]:
""" Install required libs """
!pip install stable-baselines3 #==2.0.0a5
!pip install swig
!pip install gymnasium[box2d]
!pip install huggingface_sb3

Collecting stable-baselines3
  Downloading stable_baselines3-2.3.2-py3-none-any.whl.metadata (5.1 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.13->stable-baselines3)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.13->stable-baselines3)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.13->stable-baselines3)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.13->stable-baselines3)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.13->stable-baselines3)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuff

In [2]:
""" CONFIG """

TRAIN = False
LOAD = True
SAVE = False
RENDER = False
if RENDER:
    EVAL_EPISODES = 3
else:
    EVAL_EPISODES = 100

In [5]:
""" Create Agent Algo and Train it """
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

if TRAIN:
    #Vectorized env
    env = make_vec_env("LunarLander-v2", n_envs=16)
    #env = gym.make('LunarLander-v2')

    # Instantiate the agent
    #model = PPO('MlpPolicy', env, verbose=1)
    model = PPO(
        policy="MlpPolicy",
        env=env,
        n_steps=1024,
        batch_size=64,
        n_epochs=4,
        gamma=0.999,
        gae_lambda=0.98,
        ent_coef=0.01,
        verbose=1,
    )

    # Train the agent
    model.learn(total_timesteps=int(1e6))
    model_name = "ppo-LunarLander-v2"
    model.save(model_name)

else:
    repo_id = "Cesoft/ppo-LunarLander-v2"
    repo_type = ""
    filename = "ppo-LunarLander-v2.zip"

    custom_objects = {
        "learning_rate": 0.0,
        "lr_schedule": lambda _: 0.0,
        "clip_range": lambda _: 0.0,
    }

    from huggingface_sb3 import load_from_hub
    checkpoint = load_from_hub(repo_id, filename)
    model = PPO.load(checkpoint, custom_objects=custom_objects, print_system_info=True)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


ppo-LunarLander-v2.zip:   0%|          | 0.00/148k [00:00<?, ?B/s]

== CURRENT SYSTEM INFO ==
- OS: Linux-6.1.85+-x86_64-with-glibc2.35 # 1 SMP PREEMPT_DYNAMIC Thu Jun 27 21:05:47 UTC 2024
- Python: 3.10.12
- Stable-Baselines3: 2.3.2
- PyTorch: 2.3.1+cu121
- GPU Enabled: False
- Numpy: 1.26.4
- Cloudpickle: 2.2.1
- Gymnasium: 0.29.1
- OpenAI Gym: 0.25.2

== SAVED MODEL SYSTEM INFO ==
- OS: Linux-6.1.85+-x86_64-with-glibc2.35 # 1 SMP PREEMPT_DYNAMIC Thu Jun 27 21:05:47 UTC 2024
- Python: 3.10.12
- Stable-Baselines3: 2.0.0a5
- PyTorch: 2.3.1+cu121
- GPU Enabled: True
- Numpy: 1.26.4
- Cloudpickle: 2.2.1
- Gymnasium: 0.28.1
- OpenAI Gym: 0.25.2



In [6]:

""" Evaluate the Agent """
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.evaluation import evaluate_policy

if RENDER:
    eval_env = Monitor(gym.make("LunarLander-v2", render_mode="human"))
else:
    eval_env = Monitor(gym.make("LunarLander-v2"))

mean_reward, std_reward = evaluate_policy(
    model,
    eval_env,
    n_eval_episodes=EVAL_EPISODES,
    deterministic=True,
    render = RENDER
)
eval_env.close()
print(f"mean_reward={mean_reward:.2f} +/- {std_reward} = {mean_reward - std_reward}")



  and should_run_async(code)


mean_reward=244.38 +/- 32.5906232868481 = 211.7873233331519


In [7]:

""" Save model to HuggignFace """
if SAVE:
    #https://huggingface.co/settings/tokens
    from huggingface_hub import (notebook_login,)
    notebook_login()
    #!git config --global credential.helper store

    import gymnasium as gym

    from stable_baselines3 import PPO
    from stable_baselines3.common.vec_env import DummyVecEnv
    from stable_baselines3.common.env_util import make_vec_env

    from huggingface_sb3 import package_to_hub

    # PLACE the variables you've just defined two cells above
    # Define the name of the environment
    env_id = "LunarLander-v2"

    # Define the model architecture we used
    model_architecture = "PPO"

    ## Define a repo_id
    ## repo_id is the id of the model repository from the Hugging Face Hub
    repo_id = "Cesoft/ppo-LunarLander-v2"

    ## Define the commit message
    commit_message = "Upload PPO LunarLander-v2 trained agent"

    # Create the evaluation env and set the render_mode="rgb_array"
    eval_env = DummyVecEnv([lambda: Monitor(gym.make(env_id, render_mode="rgb_array"))])

    # PLACE the package_to_hub function you've just filled here
    package_to_hub(
        model=model,  # Our trained model
        model_name=model_name,  # The name of our trained model
        model_architecture=model_architecture,  # The model architecture we used: in our case PPO
        env_id=env_id,  # Name of the environment
        eval_env=eval_env,  # Evaluation Environment
        repo_id=repo_id,  # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name}
        commit_message=commit_message,
    )