## Import Dependencies

In [1]:
from stable_baselines3 import PPO, DQN, A2C
import gymnasium as gym
from stable_baselines3.common.logger import configure
from stable_baselines3.common.evaluation import evaluate_policy

## Define Env Function

In [2]:
def make_lunar_env(params):
    return gym.make(
        "LunarLander-v3",
        continuous=False,
        gravity=params.get("gravity", -10.0),
        enable_wind=params.get("enable_wind", False),
        wind_power=params.get("wind_power", 0.0),
        turbulence_power=params.get("turbulence_power", 0.0),
        render_mode=None 
    )

curriculum_stages = [
    {
        "name": "easy",
        "gravity": -5.0,
        "enable_wind": False,
        "wind_power": 0.0,
        "turbulence_power": 0.0,
        "parcial_timesteps": 100_000,
    },
    {
        "name": "intermediate",
        "gravity": -10.0,
        "enable_wind": False,
        "wind_power": 0.0,
        "turbulence_power": 0.0,
        "parcial_timesteps": 100_000,
    },
    {
        "name": "hard_with_wind",
        "gravity": -10.0,
        "enable_wind": True,
        "wind_power": 5.0,
        "turbulence_power": 0.5,
        "parcial_timesteps": 100_000,
    },
    {
        "name": "full_difficulty",
        "gravity": -10.0,
        "enable_wind": True,
        "wind_power": 15.0,
        "turbulence_power": 1.5,
        "parcial_timesteps": 100_000,
    }
]

In [8]:
tmp_path = "./results/ppo-curriculum-lunarlander-1"
new_logger = configure(tmp_path, ["stdout", "csv", "tensorboard"])

model = None

for i, stage in enumerate(curriculum_stages):
    print(f"🔁 Treinando no estágio {i+1}: {stage['name']}")
    env = make_lunar_env(stage)

    if model is None:
        model = PPO("MlpPolicy", env, verbose=1)
        model.set_logger(new_logger)
    else:
        model.set_env(env)

    model.learn(total_timesteps=stage.get("parcial_timesteps", 100_000),
                reset_num_timesteps=False)
    print(f"🏆 Avaliando o modelo no estágio {i+1}: {stage['name']}")
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)
    print(f"🏆 Avaliação média: {mean_reward:.2f} +/- {std_reward:.2f}")
    model.save(f"./saved_models/ppo_lunar_stage_{i+1}")

Logging to ./results/ppo-curriculum-lunarlander-1
🔁 Treinando no estágio 1: easy
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 164      |
|    ep_rew_mean     | -268     |
| time/              |          |
|    fps             | 409      |
|    iterations      | 1        |
|    time_elapsed    | 5        |
|    total_timesteps | 2048     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 154          |
|    ep_rew_mean          | -256         |
| time/                   |              |
|    fps                  | 304          |
|    iterations           | 2            |
|    time_elapsed         | 13           |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0085056145 |
|    clip_f

## Long

In [3]:

curriculum_stages = [
    {
        "name": "easy",
        "gravity": -5.0,
        "enable_wind": False,
        "wind_power": 0.0,
        "turbulence_power": 0.0,
        "parcial_timesteps": 500_000,
    },
    {
        "name": "intermediate",
        "gravity": -10.0,
        "enable_wind": False,
        "wind_power": 0.0,
        "turbulence_power": 0.0,
        "parcial_timesteps": 500_000,
    },
    {
        "name": "hard_with_wind",
        "gravity": -10.0,
        "enable_wind": True,
        "wind_power": 5.0,
        "turbulence_power": 0.5,
        "parcial_timesteps": 500_000,
    },
    {
        "name": "full_difficulty",
        "gravity": -10.0,
        "enable_wind": True,
        "wind_power": 15.0,
        "turbulence_power": 1.5,
        "parcial_timesteps": 500_000,
    }
]

In [None]:
tmp_path = "./results/ppo-curriculum-lunarlander-long"
new_logger = configure(tmp_path, ["stdout", "csv", "tensorboard"])

model = None

for i, stage in enumerate(curriculum_stages):
    print(f"🔁 Treinando no estágio {i+1}: {stage['name']}")
    env = make_lunar_env(stage)

    if model is None:
        model = PPO("MlpPolicy", env, verbose=1)
        model.set_logger(new_logger)
    else:
        model.set_env(env)

    model.learn(total_timesteps=stage.get("parcial_timesteps", 100_000),
                reset_num_timesteps=False)
    print(f"🏆 Avaliando o modelo no estágio {i+1}: {stage['name']}")
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)
    print(f"🏆 Avaliação média: {mean_reward:.2f} +/- {std_reward:.2f}")
    model.save(f"./saved_models/ppo_lunar_long_stage_{i+1}")

Logging to ./results/ppo-curriculum-lunarlander-long
🔁 Treinando no estágio 1: easy
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 184      |
|    ep_rew_mean     | -302     |
| time/              |          |
|    fps             | 265      |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 2048     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 174        |
|    ep_rew_mean          | -298       |
| time/                   |            |
|    fps                  | 225        |
|    iterations           | 2          |
|    time_elapsed         | 18         |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.00794119 |
|    clip_fraction        | 0.0525     |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.38      |
|    explained_variance   | 0.000792   |
|    learning_rate        | 0.0003     |
|   

In [None]:
model.save("./saved_models/ppo_curriculum_lunar_long")