In [2]:
## TRAINING FILE

import gymnasium as gym
import numpy as np
import os

from stable_baselines3 import A2C, PPO, TD3 # these are the algorithms (models) we can use
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import VecMonitor

from Callbacks import SaveOnBestTrainingRewardCallback


env_name = "BipedalWalker-v3"
modelName = "PPO_Bipedal_1"


###   TRAINING UTILS  ###
# directory to save the log files in
# Logs will be saved in log_dir/modelName.csv
log_dir = "tmp/"
os.makedirs(log_dir, exist_ok=True)

results_filename = log_dir + modelName + "_"
# this will save the best model during training
callback = SaveOnBestTrainingRewardCallback(check_freq=100, log_dir=log_dir, file_name=modelName)




### ENVIRONMENT ###
# Create and wrap the environment

vec_env = make_vec_env(env_name, n_envs=16)
vec_env = VecMonitor(vec_env, results_filename)  # this is the monitor, that saves the training episode results to the csv file


### MAKE THE MODEL  ###
model = PPO('MlpPolicy', vec_env, verbose=0,
            n_steps = 2048,
            batch_size = 64,
            gae_lambda= 0.95,
            gamma= 0.999,
            n_epochs= 10,
            ent_coef= 0.0,
            learning_rate= 3e-4,
            clip_range= 0.18,
        )


### TRAINING ###

timesteps = 5e6
model.learn(total_timesteps=int(timesteps), callback=callback)




AttributeError: module 'gym.envs.box2d' has no attribute 'BipedalWalker'

GRIDSEARCH STUFF


In [None]:
from sklearn.model_selection import GridSearchCV

# ... (previous code) ...

# Create an instance of the PPO model without specifying any hyperparameters
model = PPO('MlpPolicy', vec_env, verbose=0)

# Define the parameter grid
param_grid = {
    'learning_rate': [1e-4, 3e-4, 5e-4],
    'n_steps': [1024, 2048, 4096],
    'clip_range': [0.1, 0.2, 0.3]
}

# Create an instance of GridSearchCV
grid_search = GridSearchCV(model, param_grid, cv=3)

# Fit the grid search object
grid_search.fit(vec_env)

# Access the best parameters
print("Best parameters found:", grid_search.best_params_)
