Install stable-baselines3 for training algorithms

In [None]:
!pip install stable_baselines3[extra]

Set-up code

In [None]:
!apt-get install -y \
    libgl1-mesa-dev \
    libgl1-mesa-glx \
    libglew-dev \
    libosmesa6-dev \
    software-properties-common

!apt-get install -y patchelf

!pip install gym
!pip install free-mujoco-py

Unzip custom-hopper environment and utils

In [None]:
!unzip custom_hopper.zip

Import libraries

In [10]:
import gym
from env.custom_hopper import *
from stable_baselines3 import PPO
from itertools import product
import numpy as np
from stable_baselines3.common.evaluation import evaluate_policy
from operator import itemgetter
from tqdm import tqdm

Hyperparameters intervals

In [8]:
n_steps_ls = np.linspace(1000, 5000, num=3, dtype=int)
batch_size_ls = np.linspace(10, 200, num=3, dtype=int)
n_epochs_ls = np.linspace(3, 30, num=3, dtype=int)
learning_rate_ls = np.logspace(-5, -3, num=3)
ent_coef = 1e-3
# ent_coef_ls = np.logspace(-3, -2, num=3)
# print(n_steps_ls)
# print(ent_coef_ls)
# print(learning_rate_ls)

Training

In [None]:
train_env = 'source'
env = gym.make(f'CustomHopper-{train_env}-v0')

#cart_prod = product(n_steps_ls, batch_size_ls, n_epochs_ls, learning_rate_ls, ent_coef_ls)
# with open('configurations.txt', 'w') as f:
#     for i, line in enumerate(cart_prod):
#         f.write(f"Config{i}: {line}\n")
#     f.close()

model_list = []
for n_steps, batch_size, n_epochs, learning_rate in tqdm(product(n_steps_ls, batch_size_ls, n_epochs_ls, learning_rate_ls)):
    params = (n_steps, batch_size, n_epochs, learning_rate, ent_coef)
    model = PPO("MlpPolicy", env=env, verbose=1, n_steps=n_steps, batch_size=batch_size, n_epochs=n_epochs, learning_rate=learning_rate, ent_coef=ent_coef)
    model = model.learn(total_timesteps=100000)
    env.reset()
    mean, std = evaluate_policy(model=model, n_eval_episodes=10, env=env)
    env.reset()
    model_list.append((model, params, mean))

0it [00:00, ?it/s]

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 19.1     |
|    ep_rew_mean     | 15.5     |
| time/              |          |
|    fps             | 232      |
|    iterations      | 1        |
|    time_elapsed    | 4        |
|    total_timesteps | 1000     |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 21            |
|    ep_rew_mean          | 17.6          |
| time/                   |               |
|    fps                  | 279           |
|    iterations           | 2             |
|    time_elapsed         | 7             |
|    total_timesteps      | 2000          |
| train/                  |               |
|    approx_kl            | 0.00012596129 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2      

1it [04:44, 284.95s/it]

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 18.3     |
|    ep_rew_mean     | 14.5     |
| time/              |          |
|    fps             | 764      |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 1000     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 20.9        |
|    ep_rew_mean          | 17.3        |
| time/                   |             |
|    fps                  | 499         |
|    iterations           | 2           |
|    time_elapsed         | 4           |
|    total_timesteps      | 2000        |
| train/                  |             |
|    approx_kl            | 0.006501311 |
|    clip_fraction        | 0.0483      |
|    clip_range           | 0.2         |
|    entropy_loss  

2it [09:20, 279.26s/it]

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 23.3     |
|    ep_rew_mean     | 20.3     |
| time/              |          |
|    fps             | 745      |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 1000     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 24          |
|    ep_rew_mean          | 21.2        |
| time/                   |             |
|    fps                  | 493         |
|    iterations           | 2           |
|    time_elapsed         | 4           |
|    total_timesteps      | 2000        |
| train/                  |             |
|    approx_kl            | 0.014744407 |
|    clip_fraction        | 0.202       |
|    clip_range           | 0.2         |
|    entropy_loss  

3it [13:57, 278.17s/it]

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 24.8     |
|    ep_rew_mean     | 22.7     |
| time/              |          |
|    fps             | 744      |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 1000     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 22.5        |
|    ep_rew_mean          | 19.7        |
| time/                   |             |
|    fps                  | 194         |
|    iterations           | 2           |
|    time_elapsed         | 10          |
|    total_timesteps      | 2000        |
| train/                  |             |
|    approx_kl            | 0.003633556 |
|    clip_fraction        | 0.00588     |
|    clip_range           | 0.2         |
|    entropy_loss  

4it [29:06, 527.31s/it]

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 16.6     |
|    ep_rew_mean     | 12.9     |
| time/              |          |
|    fps             | 743      |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 1000     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 20          |
|    ep_rew_mean          | 19.6        |
| time/                   |             |
|    fps                  | 196         |
|    iterations           | 2           |
|    time_elapsed         | 10          |
|    total_timesteps      | 2000        |
| train/                  |             |
|    approx_kl            | 0.014674838 |
|    clip_fraction        | 0.162       |
|    clip_range           | 0.2         |
|    entropy_loss  

5it [43:53, 656.94s/it]

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 16.9     |
|    ep_rew_mean     | 11.7     |
| time/              |          |
|    fps             | 747      |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 1000     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 20.3        |
|    ep_rew_mean          | 17.4        |
| time/                   |             |
|    fps                  | 199         |
|    iterations           | 2           |
|    time_elapsed         | 10          |
|    total_timesteps      | 2000        |
| train/                  |             |
|    approx_kl            | 0.018055385 |
|    clip_fraction        | 0.285       |
|    clip_range           | 0.2         |
|    entropy_loss  

6it [58:35, 733.51s/it]

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 18       |
|    ep_rew_mean     | 12.7     |
| time/              |          |
|    fps             | 763      |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 1000     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 21.5        |
|    ep_rew_mean          | 18.8        |
| time/                   |             |
|    fps                  | 122         |
|    iterations           | 2           |
|    time_elapsed         | 16          |
|    total_timesteps      | 2000        |
| train/                  |             |
|    approx_kl            | 0.009727248 |
|    clip_fraction        | 0.0709      |
|    clip_range           | 0.2         |
|    entropy_loss  

7it [1:24:11, 996.04s/it]

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 17.4     |
|    ep_rew_mean     | 12.3     |
| time/              |          |
|    fps             | 754      |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 1000     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 20.8        |
|    ep_rew_mean          | 17.6        |
| time/                   |             |
|    fps                  | 120         |
|    iterations           | 2           |
|    time_elapsed         | 16          |
|    total_timesteps      | 2000        |
| train/                  |             |
|    approx_kl            | 0.014246714 |
|    clip_fraction        | 0.183       |
|    clip_range           | 0.2         |
|    entropy_loss  

8it [1:49:54, 1169.94s/it]

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 17.7     |
|    ep_rew_mean     | 12.8     |
| time/              |          |
|    fps             | 732      |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 1000     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 19.5        |
|    ep_rew_mean          | 15.6        |
| time/                   |             |
|    fps                  | 119         |
|    iterations           | 2           |
|    time_elapsed         | 16          |
|    total_timesteps      | 2000        |
| train/                  |             |
|    approx_kl            | 0.023884399 |
|    clip_fraction        | 0.256       |
|    clip_range           | 0.2         |
|    entropy_loss  

In [None]:
model_list_sorted = sorted(model_list,key=itemgetter(2))
with open('Configurations.txt', 'w') as f:
    for i in range(3**5):
        model, params, mean = model_list_sorted[i]
        model.save(f'trained_models/ppo_tuning{i}')
        f.write(f"Config{i} - params: {params}; mean reward: {mean}\n")
    
f.close()

