In [5]:
import gymnasium as gym
from evaluation import evaluate_policy

env = gym.make("LunarLander-v3")

def random_policy(obs):
    # `obs` is required by the interface but not used here.
    return env.action_space.sample()

metrics_random = evaluate_policy(env, random_policy, num_episodes=100, seed=0)
print("Random policy metrics:")
for k, v in metrics_random.items():
    print(f"{k}: {v}")

env.close()


Random policy metrics:
num_episodes: 100
mean_return: -207.59259033203125
std_return: 113.52462005615234
min_return: -557.0078735351562
max_return: -1.1881760358810425
solved_rate: 0.0
success_rate: 0.0
crash_rate: 1.0
timeout_rate: 0.0
other_terminal_rate: 0.0
mean_episode_length: 91.57
mean_main_engine_usage: 23.2
mean_side_engine_usage: 45.08
num_success_episodes: 0
mean_final_abs_x_success: None
mean_final_abs_vy_success: None
mean_final_abs_angle_success: None


In [6]:
import gymnasium as gym
from gymnasium.envs.box2d.lunar_lander import heuristic
from evaluation import evaluate_policy

env = gym.make("LunarLander-v3")

def heuristic_policy(obs):
    # Use the official heuristic controller provided by Gymnasium.
    return heuristic(env, obs)

metrics_heuristic = evaluate_policy(env, heuristic_policy, num_episodes=100, seed=0)
print("Heuristic policy metrics:")
for k, v in metrics_heuristic.items():
    print(f"{k}: {v}")

env.close()


Heuristic policy metrics:
num_episodes: 100
mean_return: 240.22869873046875
std_return: 97.566650390625
min_return: -218.28501892089844
max_return: 314.9457702636719
solved_rate: 0.89
success_rate: 0.93
crash_rate: 0.05
timeout_rate: 0.02
other_terminal_rate: 0.0
mean_episode_length: 252.98
mean_main_engine_usage: 15.04
mean_side_engine_usage: 107.14
num_success_episodes: 93
mean_final_abs_x_success: 0.11925307661294937
mean_final_abs_vy_success: 0.0
mean_final_abs_angle_success: 0.04345544055104256
