In [1]:
import sys
sys.path.insert(0, "../")

from CustomGymEnv import CustomGymEnv
import torch
from grid2op.Parameters import Parameters
from examples.ppo_stable_baselines.B_train_agent import CustomReward
from lightsim2grid import LightSimBackend
from grid2op.Chronics import MultifolderWithCache
import numpy as np

In [2]:
# Copyright (c) 2020-2022 RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions.

import warnings
import copy
import os
import grid2op
import json

from grid2op.gym_compat import BoxGymActSpace, BoxGymObsSpace, GymEnv

from l2rpn_baselines.PPO_SB3.utils import SB3Agent

try:
    from stable_baselines3.common.callbacks import CheckpointCallback
    from stable_baselines3 import PPO
    from stable_baselines3.ppo import MlpPolicy
    _CAN_USE_STABLE_BASELINE = True
except ImportError:
    _CAN_USE_STABLE_BASELINE = False
    class MlpPolicy(object):
        """
        Do not use, this class is a template when stable baselines3 is not installed.
        
        It represents `from stable_baselines3.ppo import MlpPolicy`
        """
    
from l2rpn_baselines.PPO_SB3.utils import (default_obs_attr_to_keep, 
                                           default_act_attr_to_keep,
                                           remove_non_usable_attr,
                                           save_used_attribute)


def get_agent(env,
          name="PPO_SB3",
          iterations=1,
          save_path=None,
          load_path=None,
          net_arch=None,
          logs_dir=None,
          learning_rate=3e-4,
          save_every_xxx_steps=None,
          model_policy=MlpPolicy,
          obs_attr_to_keep=copy.deepcopy(default_obs_attr_to_keep),
          obs_space_kwargs=None,
          act_attr_to_keep=copy.deepcopy(default_act_attr_to_keep),
          act_space_kwargs=None,
          policy_kwargs=None,
          normalize_obs=False,
          normalize_act=False,
          gymenv_class=GymEnv,
          gymenv_kwargs=None,
          verbose=True,
          seed=None,  # TODO
          eval_env=None,  # TODO
          **kwargs):
    
    if not _CAN_USE_STABLE_BASELINE:
        raise ImportError("Cannot use this function as stable baselines3 is not installed")
    
    # keep only usable attributes (if default is used)
    act_attr_to_keep = remove_non_usable_attr(env, act_attr_to_keep)
    
    # save the attributes kept
    if save_path is not None:
        my_path = os.path.join(save_path, name)
    save_used_attribute(save_path, name, obs_attr_to_keep, act_attr_to_keep)

    # define the gym environment from the grid2op env
    if gymenv_kwargs is None:
        gymenv_kwargs = {}
    env_gym = gymenv_class(env, **gymenv_kwargs)
    env_gym.observation_space.close()
    if obs_space_kwargs is None:
        obs_space_kwargs = {}
    env_gym.observation_space = BoxGymObsSpace(env.observation_space,
                                               attr_to_keep=obs_attr_to_keep,
                                               **obs_space_kwargs)
    env_gym.action_space.close()
    if act_space_kwargs is None:
        act_space_kwargs = {}
    env_gym.action_space = BoxGymActSpace(env.action_space,
                                          attr_to_keep=act_attr_to_keep,
                                          **act_space_kwargs)

    if normalize_act:
        if save_path is not None:
            with open(os.path.join(my_path, ".normalize_act"), encoding="utf-8", 
                      mode="w") as f:
                f.write("I have encoded the action space !\n DO NOT MODIFY !")
        for attr_nm in act_attr_to_keep:
            if (("multiply" in act_space_kwargs and attr_nm in act_space_kwargs["multiply"]) or 
                ("add" in act_space_kwargs and attr_nm in act_space_kwargs["add"]) 
               ):
                # attribute is scaled elsewhere
                continue
            env_gym.action_space.normalize_attr(attr_nm)

    if normalize_obs:
        if save_path is not None:
            with open(os.path.join(my_path, ".normalize_obs"), encoding="utf-8", 
                      mode="w") as f:
                f.write("I have encoded the observation space !\n DO NOT MODIFY !")
        for attr_nm in obs_attr_to_keep:
            if (("divide" in obs_space_kwargs and attr_nm in obs_space_kwargs["divide"]) or 
                ("subtract" in obs_space_kwargs and attr_nm in obs_space_kwargs["subtract"]) 
               ):
                # attribute is scaled elsewhere
                continue
            env_gym.observation_space.normalize_attr(attr_nm)
    
    # Save a checkpoint every "save_every_xxx_steps" steps
    checkpoint_callback = None
    if save_every_xxx_steps is not None:
        if save_path is None:
            warnings.warn("save_every_xxx_steps is set, but no path are "
                          "set to save the model (save_path is None). No model "
                          "will be saved.")
        else:
            checkpoint_callback = CheckpointCallback(save_freq=save_every_xxx_steps,
                                                     save_path=my_path,
                                                     name_prefix=name)

    # define the policy
    if load_path is None:
        if policy_kwargs is None:
            policy_kwargs = {}
        if net_arch is not None:
            policy_kwargs["net_arch"] = net_arch
        if logs_dir is not None:
            if not os.path.exists(logs_dir):
                os.mkdir(logs_dir)
            this_logs_dir = os.path.join(logs_dir, name)
        else:
            this_logs_dir = None
                
        nn_kwargs = {
            "policy": model_policy,
            "env": env_gym,
            "verbose": verbose,
            "learning_rate": learning_rate,
            "tensorboard_log": this_logs_dir,
            "policy_kwargs": policy_kwargs,
            **kwargs
        }
        agent = SB3Agent(env.action_space,
                         env_gym.action_space,
                         env_gym.observation_space,
                         nn_kwargs=nn_kwargs,
        )
    else:        
        agent = SB3Agent(env.action_space,
                         env_gym.action_space,
                         env_gym.observation_space,
                         nn_path=os.path.join(load_path, name)
        )
    return agent, env_gym


In [3]:
ENV_NAME = "l2rpn_wcci_2022_dev"
env_name_train = '_'.join([ENV_NAME, "train"])
save_path = "./agent"
name = "CustomGymEnv"
gymenv_class = CustomGymEnv

train_args = {}

# Utility parameters PPO
train_args["logs_dir"] = "./logs"
train_args["save_path"] = save_path
train_args["name"] = name
train_args["verbose"] = 1
train_args["gymenv_class"] = gymenv_class
train_args["device"] = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_args["obs_attr_to_keep"] = ["month", "day_of_week", "hour_of_day", "minute_of_hour",
                                  "gen_p", "load_p", 
                                  "p_or", "rho", "timestep_overflow", "line_status",
                                  # dispatch part of the observation
                                  "actual_dispatch", "target_dispatch",
                                  # storage part of the observation
                                  "storage_charge", "storage_power",
                                  # curtailment part of the observation
                                  "curtailment", "curtailment_limit",  "gen_p_before_curtail",
                                  ]
train_args["act_attr_to_keep"] = ["set_storage", "curtail"]
train_args["iterations"] = 700_000
train_args["learning_rate"] = 1e-4
train_args["net_arch"] = [300, 300, 300]
train_args["gamma"] = 0.999
train_args["gymenv_kwargs"] = {"safe_max_rho": 0.1}
train_args["normalize_act"] = True
train_args["normalize_obs"] = True

train_args["save_every_xxx_steps"] = min(train_args["iterations"] // 10, 100_000)

train_args["n_steps"] = 16
train_args["batch_size"] = 16

p = Parameters()
p.LIMIT_INFEASIBLE_CURTAILMENT_STORAGE_ACTION = True

env = grid2op.make(ENV_NAME,
                   reward_class=CustomReward,
                   backend=LightSimBackend(),
                   chronics_class=MultifolderWithCache,
                   param=p)

In [4]:
with open("../preprocess_obs.json", "r", encoding="utf-8") as f:
  obs_space_kwargs = json.load(f)
with open("../preprocess_act.json", "r", encoding="utf-8") as f:
  act_space_kwargs = json.load(f)

agent, gym_env = get_agent(env,
          obs_space_kwargs=obs_space_kwargs,
          act_space_kwargs=act_space_kwargs,
          **train_args)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [5]:
data = np.load("expert_data.npz", allow_pickle=True)

In [6]:
act = data.get("expert_actions")[0]
obs = data.get("expert_observations")[0]
print(act)

2050.0
This action will:
	 - NOT change anything to the injections
	 - Modify the generators with redispatching in the following way:
	 	 - Redispatch "gen_7_2" of 0.31 MW
	 	 - Redispatch "gen_11_6" of 0.62 MW
	 	 - Redispatch "gen_14_8" of 0.02 MW
	 	 - Redispatch "gen_17_10" of -0.06 MW
	 	 - Redispatch "gen_25_13" of 0.02 MW
	 	 - Redispatch "gen_41_19" of 0.08 MW
	 	 - Redispatch "gen_53_25" of 0.32 MW
	 	 - Redispatch "gen_59_31" of -0.14 MW
	 	 - Redispatch "gen_60_32" of -0.17 MW
	 	 - Redispatch "gen_64_35" of -0.01 MW
	 	 - Redispatch "gen_68_37" of 0.02 MW
	 	 - Redispatch "gen_69_38" of 0.01 MW
	 	 - Redispatch "gen_76_41" of 0.00 MW
	 	 - Redispatch "gen_76_42" of 0.01 MW
	 	 - Redispatch "gen_82_45" of 0.01 MW
	 	 - Redispatch "gen_91_51" of 0.01 MW
	 	 - Redispatch "gen_99_53" of 0.01 MW
	 	 - Redispatch "gen_103_55" of 0.01 MW
	 	 - Redispatch "gen_110_58" of 0.01 MW
	 	 - Redispatch "gen_112_60" of -0.10 MW
	 - Modify the storage units in the following way:
	 	 - Ask u

In [7]:
low = gym_env.action_space.dict_properties["curtail"][0]
high = gym_env.action_space.dict_properties["curtail"][1]

curtail = act.curtail.copy()[env.gen_renewable]
curtail = curtail / (high - low) + low

In [8]:
low = gym_env.action_space.dict_properties["set_storage"][0]
high = gym_env.action_space.dict_properties["set_storage"][1]

storage = act.storage_p.copy() / act_space_kwargs["multiply"]["set_storage"]

In [9]:
gym_action = np.concatenate((curtail, storage))
print(gym_env.action_space.from_gym(gym_action))

This action will:
	 - NOT change anything to the injections
	 - NOT perform any redispatching action
	 - Modify the storage units in the following way:
	 	 - Ask unit "storage_22_0" to absorb 1.52 MW (setpoint: 1.52 MW)
	 	 - Ask unit "storage_41_1" to produce 7.75 MW (setpoint: -7.75 MW)
	 	 - Ask unit "storage_44_2" to produce 9.65 MW (setpoint: -9.65 MW)
	 	 - Ask unit "storage_58_3" to produce 9.57 MW (setpoint: -9.57 MW)
	 	 - Ask unit "storage_76_4" to produce 0.89 MW (setpoint: -0.89 MW)
	 	 - Ask unit "storage_95_5" to produce 0.75 MW (setpoint: -0.75 MW)
	 	 - Ask unit "storage_112_6" to absorb 10.20 MW (setpoint: 10.20 MW)
	 - Perform the following curtailment:
	 	 - Limit unit "gen_18_11" to 0.0% of its Pmax (setpoint: 0.000)
	 	 - Limit unit "gen_24_12" to 17.2% of its Pmax (setpoint: 0.172)
	 	 - Limit unit "gen_26_14" to 14.6% of its Pmax (setpoint: 0.146)
	 	 - Limit unit "gen_26_15" to 18.9% of its Pmax (setpoint: 0.189)
	 	 - Limit unit "gen_61_33" to 3.5% of its Pmax 

In [14]:
g2op_obs = env.reset()
g2op_obs.from_vect(obs)
print(f"max_rho: {g2op_obs.rho.max():.4f}")
gym_env.observation_space.to_gym(g2op_obs)

max_rho: 0.9736


array([0.5       , 0.5       , 0.50000197, ..., 0.5       , 0.5       ,
       0.5       ], dtype=float32)

In [11]:
agent.nn_model.policy

ActorCriticPolicy(
  (features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (mlp_extractor): MlpExtractor(
    (shared_net): Sequential(
      (0): Linear(in_features=1225, out_features=300, bias=True)
      (1): Tanh()
      (2): Linear(in_features=300, out_features=300, bias=True)
      (3): Tanh()
      (4): Linear(in_features=300, out_features=300, bias=True)
      (5): Tanh()
    )
    (policy_net): Sequential()
    (value_net): Sequential()
  )
  (action_net): Linear(in_features=300, out_features=49, bias=True)
  (value_net): Linear(in_features=300, out_features=1, bias=True)
)