In [1]:
from gym import Env, spaces
from grid2op import make
from grid2op.Parameters import Parameters
from lightsim2grid import LightSimBackend
from stable_baselines3 import DQN

import numpy as np

In [2]:
p = Parameters()
p.LIMIT_INFEASIBLE_CURTAILMENT_STORAGE_ACTION = True

In [3]:
class grid2opGymEnv(Env):
  def __init__(self, env_name):
    super(grid2opGymEnv, self).__init__()

    self.env = make(env_name,
                    backend=LightSimBackend(),
                    param=p)

    self.observation_space = spaces.Box(low=-np.inf,
                                        high=np.inf,
                                        shape=(3868,),
                                        dtype=np.float32)
    self.action_space = spaces.Discrete(1000)

    actions = np.load("top1000_actions.npz", allow_pickle=True)["actions"]
    self.all_actions = []
    for action in actions:
      self.all_actions.append(self.env.action_space.from_vect(action))
      
  def reset(self):
    obs = self.env.reset()
    return obs.to_vect()
  
  def step(self, action):
    action = self.all_actions[action]
    obs, reward, done, info = self.env.step(action)
    return obs.to_vect(), reward, done, info

In [4]:
env = grid2opGymEnv("l2rpn_neurips_2020_track2_small")

In [4]:
model = DQN("MlpPolicy", env, verbose=1, buffer_size=50_000, learning_starts=1000)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [5]:
model.learn(total_timesteps=10000, log_interval=4)

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 11       |
|    ep_rew_mean      | 8.99e+03 |
|    exploration_rate | 0.958    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 6        |
|    time_elapsed     | 6        |
|    total_timesteps  | 44       |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 11.5     |
|    ep_rew_mean      | 8.79e+03 |
|    exploration_rate | 0.913    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 7        |
|    time_elapsed     | 12       |
|    total_timesteps  | 92       |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 10.8     |
|    ep_rew_mean      | 8.14e+03 |
|    exploration_rate | 0.877    |
| time/               |          |
|    episodes       

KeyboardInterrupt: 