In [1]:
# https://github.com/huggingface/deep-rl-class/blob/main/unit1/unit1.ipynb

In [2]:
from datetime import datetime as dt
# Virtual display
from pyvirtualdisplay import Display
import gym
from huggingface_sb3 import load_from_hub, package_to_hub, push_to_hub
from huggingface_hub import notebook_login # To log to our Hugging Face account to be able to upload models to the Hub.
from stable_baselines3 import A2C, DDPG, DQN, HER, PPO, SAC, TD3
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.env_util import make_vec_env
import stable_baselines3
import numpy as np

2022-06-04 10:34:44.278320: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [3]:
virtual_display = Display(visible=0, size=(1400, 900))
virtual_display.start()

<pyvirtualdisplay.display.Display at 0x7f16144d1340>

In [4]:
# Create environment
env = gym.make('BipedalWalker-v3')
stable_baselines3.common.utils.get_device()

device(type='cuda')

In [5]:
def get_seed():
    np.random.seed()
    return np.random.randint(0, 2**32)

In [6]:
def train(model, seed, name_postfix):
    print(dt.now())
    model.learn(total_timesteps=500000, tb_log_name="first_run_"+name_postfix)
    print(dt.now())
    # Save the model
    model_name = "BipedalWalker-v3_"+name_postfix
    model.save(model_name)

    #@title
    eval_env = gym.make("BipedalWalker-v3")
    mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
    print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")

In [7]:
log_dir = "./logs/"

In [8]:
seed = get_seed()

In [9]:
model_A2C_Mlp = A2C(
    policy = 'MlpPolicy',
    env = env,    
    tensorboard_log=log_dir,    
    verbose=0,
    seed=seed,
    device='cuda'
)
train(model_A2C_Mlp, seed, 'A2C-Mlp')

2022-06-04 10:34:49.048746
2022-06-04 10:59:38.588713




mean_reward=-152.90 +/- 0.047019894115709185


In [11]:
model_DDPG_MLP = DDPG(
    policy = 'MlpPolicy',
    env = env,    
    tensorboard_log=log_dir,    
    verbose=0,
    seed=seed,
    device='cuda'
)
train(model_DDPG_MLP, seed, 'DDPG-Mlp')

2022-06-04 11:06:38.820088
2022-06-04 12:00:10.783256
mean_reward=267.19 +/- 2.3376836898101625


```
# X #
model_DQN_Mlp = DQN(
    policy = 'MlpPolicy',
    env = env,    
    tensorboard_log=log_dir,    
    verbose=0,
    seed=seed,
    device='cuda'
)
train(model_DQN_Mlp, seed, 'DQN-Mlp')
```

```
# X #
model_HER = HER(
    policy = 'MlpPolicy',
    env = env,    
    tensorboard_log=log_dir,    
    verbose=0,
    seed=seed,
    device='cuda'
)
train(model_HER, seed, 'HER-Mlp')
```

In [13]:
model_PPO_MLP = PPO(
    policy = 'MlpPolicy',
    env = env,    
    tensorboard_log=log_dir,    
    verbose=0,
    seed=seed,
    device='cuda'
)
train(model_PPO_MLP, seed, 'PPO-Mlp')

2022-06-04 12:23:41.029752
2022-06-04 12:38:06.229256
mean_reward=227.96 +/- 122.353694893439


In [14]:
model_SAC = SAC(
    policy = 'MlpPolicy',
    env = env,    
    tensorboard_log=log_dir,    
    verbose=0,
    seed=seed,
    device='cuda'
)
train(model_SAC, seed, 'SAC-Mlp')

2022-06-04 12:39:08.329262
2022-06-04 14:02:52.118978
mean_reward=306.34 +/- 1.04225526781266


In [16]:
#@title
eval_env = gym.make("BipedalWalker-v3")
mean_reward, std_reward = evaluate_policy(model_SAC, eval_env, n_eval_episodes=10, deterministic=True)
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")

mean_reward=305.35 +/- 0.5660530984092027


In [17]:
model_TD3 = TD3(
    policy = 'MlpPolicy',
    env = env,    
    tensorboard_log=log_dir,    
    verbose=0,
    seed=seed,
    device='cuda'
)
train(model_TD3, seed, 'TD3-Mlp')

2022-06-04 14:05:25.801656
2022-06-04 14:53:36.536795
mean_reward=-76.03 +/- 4.4398826705535


### Step 8: Publish our trained model on the Hub

In [18]:
notebook_login()

Login successful
Your token has been saved to /home/alex/.huggingface/token


In [19]:
!git config --global credential.helper store

In [20]:
import gym

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env

from huggingface_sb3 import package_to_hub

In [21]:
# PLACE the variables you've just defined two cells above
# Define the name of the environment
env_id = "BipedalWalker-v3"

# TODO: Define the model architecture we used
model_architecture = "SAC"

## Define a repo_id
## repo_id is the id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2
## CHANGE WITH YOUR REPO ID
repo_id = "format37/BipedalWalker-v3"

## Define the commit message
commit_message = "Upload SAC BipedalWalker-v3 trained agent"

# Create the evaluation env
eval_env = DummyVecEnv([lambda: gym.make(env_id)])

In [23]:
model_name = 'SAC-Mlp'

In [24]:
# PLACE the package_to_hub function you've just filled here
package_to_hub(model=model_SAC, # Our trained model
               model_name=model_name, # The name of our trained model 
               model_architecture=model_architecture, # The model architecture we used: in our case PPO
               env_id=env_id, # Name of the environment
               eval_env=eval_env, # Evaluation Environment
               repo_id=repo_id, # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2
               commit_message=commit_message)

[38;5;4mℹ This function will save, evaluate, generate a video of your agent,
create a model card and push everything to the hub. It might take up to 1min.
This is a work in progress: If you encounter a bug, please open an issue and use
push_to_hub instead.[0m


Cloning https://huggingface.co/format37/BipedalWalker-v3 into local empty directory.


Saving video to /home/alex/rig1/projects/pc/deep-rl-class/unit1/-step-0-to-step-1000.mp4


ffmpeg version 4.4.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 9.4.0 (GCC)
  configuration: --prefix=/home/alex/anaconda3/envs/pycuda --cc=/home/conda/feedstock_root/build_artifacts/ffmpeg_1636205340875/_build_env/bin/x86_64-conda-linux-gnu-cc --disable-doc --disable-openssl --enable-avresample --enable-demuxer=dash --enable-gnutls --enable-gpl --enable-hardcoded-tables --enable-libfreetype --enable-libopenh264 --enable-vaapi --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-libvpx --enable-pic --enable-pthreads --enable-shared --disable-static --enable-version3 --enable-zlib --enable-libmp3lame --pkg-config=/home/conda/feedstock_root/build_artifacts/ffmpeg_1636205340875/_build_env/bin/pkg-config
  libavutil      56. 70.100 / 56. 70.100
  libavcodec     58.134.100 / 58.134.100
  libavformat    58. 76.100 / 58. 76.100
  libavdevice    58. 13.100 / 58. 13.100
  libavfilter     7.110.100 /  7.110.100
  libavresample   4.  0

[38;5;4mℹ Pushing repo BipedalWalker-v3 to the Hugging Face Hub[0m


Upload file SAC-Mlp.zip:   1%|          | 32.0k/3.13M [00:00<?, ?B/s]

Upload file SAC-Mlp/pytorch_variables.pth: 100%|##########| 747/747 [00:00<?, ?B/s]

Upload file replay.mp4:   8%|8         | 32.0k/397k [00:00<?, ?B/s]

Upload file SAC-Mlp/actor.optimizer.pth:   5%|5         | 32.0k/584k [00:00<?, ?B/s]

Upload file SAC-Mlp/ent_coef_optimizer.pth: 100%|##########| 1.23k/1.23k [00:00<?, ?B/s]

Upload file SAC-Mlp/critic.optimizer.pth:   3%|2         | 32.0k/1.13M [00:00<?, ?B/s]

Upload file SAC-Mlp/policy.pth:   2%|2         | 32.0k/1.41M [00:00<?, ?B/s]

remote: Enforcing permissions...        
remote: Allowed refs: all        
To https://huggingface.co/format37/BipedalWalker-v3
   747a4d6..964a458  main -> main



[38;5;4mℹ Your model is pushed to the hub. You can view your model here:
https://huggingface.co/format37/BipedalWalker-v3[0m


'https://huggingface.co/format37/BipedalWalker-v3'