In [None]:
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
import optuna

In [11]:
TIMESTEPS = 50000
N_TRIALS = 100

In [None]:
def optimize(trial):
    env = gym.make("LunarLander-v3")
    policy_kwargs = dict(
        net_arch=dict(
            pi=[trial.suggest_categorical("pi", [64, 128, 256])],
            vf=[trial.suggest_categorical("vf", [64, 128, 256])],
        )
    )
    model = PPO(
        "MlpPolicy",
        env,
        learning_rate=trial.suggest_float("lr", 1e-5, 1e-3, log=True),
        n_steps=trial.suggest_categorical("n_steps", [128, 256, 512]),
        gamma=trial.suggest_float("gamma", 0.95, 0.999),
        policy_kwargs=policy_kwargs,
        seed=42,
        device="cpu",
        verbose=0,
    )
    model.learn(total_timesteps=TIMESTEPS)
    mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=5)
    env.close()
    return mean_reward

In [None]:
study = optuna.create_study(direction="maximize")
study.optimize(optimize, n_trials=N_TRIALS)
print(study.best_params)

[I 2025-05-23 19:42:19,771] A new study created in memory with name: no-name-9baa44b6-da03-4c8d-bc3b-3b925e1f7e28
[I 2025-05-23 19:42:43,649] Trial 0 finished with value: -902.7352706920356 and parameters: {'pi': 64, 'vf': 256, 'lr': 1.2342785840828689e-05, 'n_steps': 128, 'gamma': 0.984813800930772}. Best is trial 0 with value: -902.7352706920356.
[I 2025-05-23 19:43:06,957] Trial 1 finished with value: -831.3144875274971 and parameters: {'pi': 128, 'vf': 256, 'lr': 4.544979774470949e-05, 'n_steps': 256, 'gamma': 0.9920176653389603}. Best is trial 1 with value: -831.3144875274971.
[I 2025-05-23 19:43:30,253] Trial 2 finished with value: -628.5406919717789 and parameters: {'pi': 256, 'vf': 64, 'lr': 1.2962525615979408e-05, 'n_steps': 256, 'gamma': 0.9510410132651619}. Best is trial 2 with value: -628.5406919717789.
[I 2025-05-23 19:43:53,611] Trial 3 finished with value: -3855.710998980375 and parameters: {'pi': 128, 'vf': 256, 'lr': 9.752902862713016e-05, 'n_steps': 256, 'gamma': 0.98

{'pi': 256, 'vf': 256, 'lr': 0.0007966338872579695, 'n_steps': 128, 'gamma': 0.9759267686819444}
