# Agent evaluation

## Imports

In [92]:
from grid2op.Episode import EpisodeData
from pathlib import Path
import re
from tqdm import tqdm
import numpy as np
from pprint import pprint

## Configs

In [None]:
AGENTS = {
    "PPO",  # baseline agent
    "PPO_reward",  # baseline with improved reward
    "MaskablePPO",  # MaskablePPO (standard reward)
    "MaskablePPO_reward",  # MaskablePPO with improved reward
    # "GraphPPO",  # baseline with graph embeddings
    "GraphPPO_reward"  # baseline with graph embeddins and improved reward
    "MaskableGraphPPO",  # MaskablePPO with graph embeddings
    "MaskableGraphPPO_reward",  # MaskablePPO with graph embeddings and improved reward
}
RAW_DATA_FOLDER = "raw_zips"  # this folder gets ignored

EVALUATION_LOGS_DIR = "../presentation_data/eval_logs/"
EPISODE_FOLDER_REGEX = r"^\d{4}$"  # matches 0010, 0020, 0200, 4000...


## Data loading

In [94]:
agent_data = {}
for agent_dir in Path(EVALUATION_LOGS_DIR).iterdir():
    if agent_dir.is_dir():
        longest_matched_agent = ""
        for agent in AGENTS.difference(
            agent_data.keys()
        ):  # ignore already matched agents
            if agent in agent_dir.name and len(agent) > len(longest_matched_agent):
                longest_matched_agent = agent
        if len(longest_matched_agent) > 0:
            if longest_matched_agent in agent_data:
                raise ValueError(
                    f"Matched the same agent {longest_matched_agent} twice."
                )

            agent_data[longest_matched_agent] = {}
            print(
                f"Loading episode data for agent {longest_matched_agent} from {agent_dir}"
            )
            for episode_dir in tqdm(
                list(agent_dir.iterdir())
            ):  # list needed for tqdm to predict timing
                if re.match(EPISODE_FOLDER_REGEX, str(episode_dir.name)):
                    agent_data[longest_matched_agent][episode_dir.name] = (
                        EpisodeData.from_disk(str(agent_dir), episode_dir.name)
                    )
unmatched_agents = AGENTS.difference(agent_data.keys())
if len(unmatched_agents) > 0:
    print(f"Could not match some agents: {unmatched_agents}")

Loading episode data for agent MaskablePPO from ../presentation_data/eval_logs/MaskablePPO_env=l2rpn_case14_sandbox_iterations=10000_2025-09-14_20:37:50


100%|██████████| 20/20 [00:31<00:00,  1.56s/it]


Loading episode data for agent PPO_reward from ../presentation_data/eval_logs/PPO_reward_env=l2rpn_case14_sandbox_iterations=10000_2025-09-14_17:06:53


100%|██████████| 20/20 [00:59<00:00,  2.96s/it]


Loading episode data for agent PPO from ../presentation_data/eval_logs/GraphPPO_reward_env=l2rpn_case14_sandbox_iterations=10000_2025-09-15_20:54:44


100%|██████████| 20/20 [00:39<00:00,  1.96s/it]


Loading episode data for agent MaskablePPO_reward from ../presentation_data/eval_logs/MaskablePPO_reward_env=l2rpn_case14_sandbox_iterations=10000_2025-09-14_20:34:47


100%|██████████| 20/20 [01:01<00:00,  3.08s/it]

Could not match some agents: {'MaskableGraphPPO', 'MaskableGraphPPO_reward'}





## Agent statistics

In [95]:
agent_statistics = {}
for agent in agent_data:
    agent_statistics[agent] = {}
    agent_statistics[agent]["score"] = {}
    agent_statistics[agent]["timesteps_survived"] = {}
    agent_statistics[agent]["total_timesteps"] = {}
    for episode_name, episode_data in agent_data[agent].items():
        total_timesteps = len(episode_data.rewards)
        timesteps_survived = np.count_nonzero(~np.isnan(episode_data.rewards))
        agent_statistics[agent]["total_timesteps"][episode_name] = total_timesteps
        agent_statistics[agent]["timesteps_survived"][episode_name] = timesteps_survived
        agent_statistics[agent]["score"][episode_name] = (
            timesteps_survived / total_timesteps
        )

    scores = list(agent_statistics[agent]["score"].values())
    agent_statistics[agent]["mean_score"] = np.mean(scores)
    agent_statistics[agent]["median_score"] = np.median(scores)
    agent_statistics[agent]["min_score"] = {
        min(agent_statistics[agent]["score"], key=agent_statistics[agent]["score"].get),
        min(scores),
    }
    agent_statistics[agent]["max_score"] = {
        max(agent_statistics[agent]["score"], key=agent_statistics[agent]["score"].get),
        max(scores),
    }


In [96]:
# WARNING: here the score is equal due to the small evaluation set
# they do not play the same actions by but in the end they have the same effect
mean_scores = {}
for agent in agent_statistics:
    mean_scores[agent] = agent_statistics[agent]["mean_score"]
pprint(mean_scores)

# TODO: recover the data from colab

{'MaskablePPO': np.float64(0.12759641617063494),
 'MaskablePPO_reward': np.float64(0.2951776413690476),
 'PPO': np.float64(0.1697901165674603),
 'PPO_reward': np.float64(0.2951776413690476)}
