# PPO Hyperparameters tuning

In [None]:
import optuna
from stable_baselines3.common.evaluation import evaluate_policy
from PPO.ppo_utils import create_agent, train
import gym
from env.custom_hopper import *

n_trials = 50
n_episodes = 1000
mean_timestep = 100
n_eval_episodes = 1000

def optimize_call(clip_range, learning_rate, gamma, n_episodes, n_eval_episodes, env: str = "CustomHopper-source-v0"):
    train_env = gym.make(env)
    
    agent = create_agent(
        clip_range=clip_range, 
        verbose=0,
        learning_rate=learning_rate,
        gamma=gamma
    )
    train(agent, total_timestep=n_episodes)
    
    mean_reward, _ = evaluate_policy(agent, train_env, n_eval_episodes=n_eval_episodes)
    return mean_reward


def objective(trial):
    clip_range = trial.suggest_float("clip_range", 0.01, 0.3, log=True)
    learning_rate = trial.suggest_float("learning_rate", 5e-4, 1e-3, step=1e-4)
    gamma = trial.suggest_float("gamma", 0.99, 0.999, step=0.001)
    
    total_reward = optimize_call(clip_range, learning_rate, gamma, n_episodes*mean_timestep, n_eval_episodes)

    return total_reward



study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=n_trials, n_jobs=4)

print("Best parameters:", study.best_params)
print("Best reward:", study.best_value)

[I 2025-05-29 22:13:31,964] A new study created in memory with name: no-name-c590d939-6183-49c7-9683-6fac34239599
