In [None]:
# https://github.com/huggingface/deep-rl-class/blob/main/unit1/unit1.ipynb

In [1]:
from datetime import datetime as dt
# Virtual display
from pyvirtualdisplay import Display
import gym
from huggingface_sb3 import load_from_hub, package_to_hub, push_to_hub
from huggingface_hub import notebook_login # To log to our Hugging Face account to be able to upload models to the Hub.
from stable_baselines3 import A2C, DDPG, DQN, HER, PPO, SAC, TD3
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.env_util import make_vec_env
import stable_baselines3
import numpy as np

2022-10-12 21:13:51.881109: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [2]:
#from stable_baselines.deepq import DQNsl

In [3]:
virtual_display = Display(visible=0, size=(1400, 900))
virtual_display.start()

<pyvirtualdisplay.display.Display at 0x7f2e443e8310>

In [4]:
# Create environment
env = gym.make('MountainCar-v0')
stable_baselines3.common.utils.get_device()

device(type='cuda')

In [5]:
def get_seed():
    np.random.seed()
    return np.random.randint(0, 2**32)

In [6]:
def train(model, seed, name_postfix):
    print(dt.now())
    model.learn(total_timesteps=5000000, tb_log_name="first_run_"+name_postfix)
    print(dt.now())
    # Save the model
    model_name = "ppo-MountainCar-v0_"+name_postfix
    model.save(model_name)

    #@title
    eval_env = gym.make("MountainCar-v0")
    mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
    print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")

In [7]:
log_dir = "./logs2/"

In [8]:
seed = get_seed()

In [None]:
model_A2C_Mlp = A2C(
    policy = 'MlpPolicy',
    env = env,    
    tensorboard_log=log_dir,    
    verbose=0,
    seed=seed,
    device='cuda'
)
train(model_A2C_Mlp, seed, 'A2C-Mlp')

2022-10-12 21:04:24.546188


```
# X #
model_DDPG = DDPG(
    policy = 'MlpPolicy',
    env = env,    
    tensorboard_log=log_dir,    
    verbose=0,
    seed=seed,
    device='cuda'
)
train(model_DDPG, seed, 'DDPG-Mlp')
```

In [None]:
model_DQN = DQN(
        policy="MlpPolicy",
        env=env,
        learning_rate=1e-3,
        buffer_size=50000,
        exploration_fraction=0.1,
        exploration_final_eps=0.1,
        #param_noise=True,
        #policy_kwargs=dict(layers=[64])
    )
train(model_DQN, seed, 'DQN-Mlp')

```
# X #
model_HER = HER(
    policy = 'MlpPolicy',
    env = env,    
    tensorboard_log=log_dir,    
    verbose=0,
    seed=seed,
    device='cuda'
)
train(model_HER, seed, 'HER-Mlp')
```

In [9]:
model_PPO_MLP = PPO(
    policy = 'MlpPolicy',
    env = env,    
    tensorboard_log=log_dir,    
    verbose=0,
    seed=seed,
    device='cuda'
)
train(model_PPO_MLP, seed, 'PPO-Mlp')

2022-10-12 21:14:04.682616
2022-10-13 00:10:56.743365




mean_reward=-147.80 +/- 6.19354502688081


```
# X #
model_SAC = SAC(
    policy = 'MlpPolicy',
    env = env,    
    tensorboard_log=log_dir,    
    verbose=0,
    seed=seed,
    device='cuda'
)
train(model_SAC, seed, 'SAC-Mlp')
```

```
# X #
model_TD3 = TD3(
    policy = 'MlpPolicy',
    env = env,    
    tensorboard_log=log_dir,    
    verbose=0,
    seed=seed,
    device='cuda'
)
train(model_TD3, seed, 'TD3-Mlp')
```

### Publish our trained model on the Hub

In [10]:
notebook_login()

Login successful
Your token has been saved to /home/alex/.huggingface/token


In [11]:
!git config --global credential.helper store

In [12]:
import gym

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env

from huggingface_sb3 import package_to_hub

In [14]:
# PLACE the variables you've just defined two cells above
# Define the name of the environment
env_id = "MountainCar-v0"

# TODO: Define the model architecture we used
model_architecture = "PPO"

## Define a repo_id
## repo_id is the id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2
## CHANGE WITH YOUR REPO ID
repo_id = "format37/PPO-MountainCar-v0"

## Define the commit message
commit_message = "Upload PPO MountainCar-v0 trained agent"

# Create the evaluation env
eval_env = DummyVecEnv([lambda: gym.make(env_id)])

In [15]:
model_name = 'PPO-Mlp'

In [16]:
# PLACE the package_to_hub function you've just filled here
package_to_hub(model=model_PPO_MLP, # Our trained model
               model_name=model_name, # The name of our trained model 
               model_architecture=model_architecture, # The model architecture we used: in our case PPO
               env_id=env_id, # Name of the environment
               eval_env=eval_env, # Evaluation Environment
               repo_id=repo_id, # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2
               commit_message=commit_message)

[38;5;4mℹ This function will save, evaluate, generate a video of your agent,
create a model card and push everything to the hub. It might take up to 1min.
This is a work in progress: If you encounter a bug, please open an issue and use
push_to_hub instead.[0m


Cloning https://huggingface.co/format37/PPO-MountainCar-v0 into local empty directory.


Saving video to /home/alex/projects/deep-rl-class/unit1/-step-0-to-step-1000.mp4


ffmpeg version 4.4.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 9.4.0 (GCC)
  configuration: --prefix=/home/alex/anaconda3/envs/pycuda --cc=/home/conda/feedstock_root/build_artifacts/ffmpeg_1636205340875/_build_env/bin/x86_64-conda-linux-gnu-cc --disable-doc --disable-openssl --enable-avresample --enable-demuxer=dash --enable-gnutls --enable-gpl --enable-hardcoded-tables --enable-libfreetype --enable-libopenh264 --enable-vaapi --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-libvpx --enable-pic --enable-pthreads --enable-shared --disable-static --enable-version3 --enable-zlib --enable-libmp3lame --pkg-config=/home/conda/feedstock_root/build_artifacts/ffmpeg_1636205340875/_build_env/bin/pkg-config
  libavutil      56. 70.100 / 56. 70.100
  libavcodec     58.134.100 / 58.134.100
  libavformat    58. 76.100 / 58. 76.100
  libavdevice    58. 13.100 / 58. 13.100
  libavfilter     7.110.100 /  7.110.100
  libavresample   4.  0

[38;5;4mℹ Pushing repo PPO-MountainCar-v0 to the Hugging Face Hub[0m


Upload file replay.mp4:  13%|#3        | 32.0k/242k [00:00<?, ?B/s]

Upload file PPO-Mlp.zip:  24%|##3       | 32.0k/134k [00:00<?, ?B/s]

Upload file PPO-Mlp/policy.optimizer.pth:  41%|####1     | 32.0k/77.4k [00:00<?, ?B/s]

Upload file PPO-Mlp/pytorch_variables.pth: 100%|##########| 431/431 [00:00<?, ?B/s]

Upload file PPO-Mlp/policy.pth:  80%|########  | 32.0k/39.8k [00:00<?, ?B/s]

remote: Scanning LFS files for validity, may be slow...        
remote: LFS file scan complete.        
To https://huggingface.co/format37/PPO-MountainCar-v0
   0b67df1..f23e9d6  main -> main



[38;5;4mℹ Your model is pushed to the hub. You can view your model here:
https://huggingface.co/format37/PPO-MountainCar-v0[0m


'https://huggingface.co/format37/PPO-MountainCar-v0'