In [20]:
import grid2op
import random
import copy
import gym
import numpy as np
import torch
import torch.nn.functional as F

import matplotlib.pyplot as plt

from typing import Callable
from torch import nn as nn
from torch.optim import AdamW
from tqdm import tqdm

from grid2op.Agent import BaseAgent
from grid2op.gym_compat import GymEnv, BoxGymObsSpace, DiscreteActSpace
from gym import Env
from gym.utils.env_checker import check_env
from grid2op.PlotGrid import PlotMatplot

In [2]:
class PreprocessEnv(gym.Wrapper):
    
    def __init__(self, env):
        gym.Wrapper.__init__(self, env)
    
    def reset(self):
        obs = self.env.reset()
        print(f'type={ type(obs) }')
        return torch.from_numpy(obs).unsqueeze(dim=0).float()
    
    def step(self, action):
        action = action.item()
        next_state, reward, done, info = self.env.step(action)
        next_state = torch.from_numpy(next_state).unsqueeze(dim=0).float()
        reward = torch.tensor(reward).view(1, -1).float()
        done = torch.tensor(done).view(1, -1)
        return next_state, reward, done, info

In [83]:
class ReplayMemory:

    def __init__(self, capacity=1000000):
        self.capacity = capacity
        self.memory = []
        self.position = 0

    def insert(self, transition):
        if len(self.memory) < self.capacity:
            self.memory.append(None)
        self.memory[self.position] = transition
        self.position = (self.position + 1) % self.capacity

    def sample(self, batch_size):
        assert self.can_sample(batch_size)

        batch = random.sample(self.memory, batch_size)
        batch = zip(*batch)
        return [torch.cat(items) for items in batch]

    def can_sample(self, batch_size):
        print(f'Memory len is:{len(self.memory)} and condition is: {batch_size * 10}')
        return len(self.memory) >= batch_size * 10

    def __len__(self):
        return len(self.memory)

In [102]:
class DeepSarsaAgent(BaseAgent):
    
    def __init__(self, episodes, env_name=None):
        if env_name is None:
            raise RuntimeError("Environment name must be passed")
        env = grid2op.make(env_name)
        self.episodes = episodes
        self.gym_env = GymEnv(env)
        self.gym_env.observation_space = BoxGymObsSpace(env.observation_space, attr_to_keep=["gen_p", "load_p", "topo_vect", "rho"])
        self.gym_env.action_space = DiscreteActSpace(env.action_space, attr_to_keep=["set_bus" , "change_bus", "change_line_status", "set_line_status", "set_line_status_simple"])
        self.state_dims = self.gym_env.observation_space.shape[0]
        self.num_actions = self.gym_env.action_space.n
        
        self.prepo_gym_env = PreprocessEnv(self.gym_env)
    
    def q_network_def(self):
        q_network = nn.Sequential(nn.Linear(self.state_dims, 300),
                                  nn.ReLU(),
                                  nn.Linear(300, 250),
                                  nn.ReLU(),
                                  nn.Linear(250, self.num_actions))
        return q_network
    
    def target_network(self):
        q_network = self.q_network_def()
        target_q_network = copy.deepcopy(q_network).eval()
        return target_q_network
    
    def policy(self, state, epsilon=0.05):
        self.epsilon = 0.05
        q_network = self.q_network_def()
        if torch.rand(1) < epsilon:
            return torch.randint(self.num_actions, (1, 1))
        else:
            av = q_network(state).detach()
            return torch.argmax(av, dim=-1, keepdim=True)
    
    def train_network(self, policy, alpha=0.001, batch_size=32, gamma=0.99, epsilon=0.):
        self.batch_size = 5
        self.gamma = 0.99
        q_network = self.q_network_def()
        target_q_network = self.target_network()
        optim = AdamW(q_network.parameters(), lr=0.001) #optimiser to optimise weight calculation of neural networks
        memory = ReplayMemory() #Initialising memory to store State, Action, Reward, and Next State
        stats = {'MSE Loss': [], 'Returns': []} #Dict to store statistics
        self.trained = False
    
        for self.episode in tqdm(range(1, self.episodes + 1)):
            state = self.prepo_gym_env.reset() #getting initial state
            done = False
            ep_return = 0
            while not done:
                action = self.policy(state, epsilon) #Getting first action greedily with randomisation factor Epsilon
                print(f'The action is: {action}')
                next_state, reward, done, _ = self.prepo_gym_env.step(action) #taking selected action on environment
                memory.insert([state, action, reward, done, next_state]) #Storing the results to memory
                if memory.can_sample(self.batch_size): #samples will be created only if memory pool is 10 times of batch size
                    state_b, action_b, reward_b, done_b, next_state_b = memory.sample(self.batch_size) #creating batches to train neural network
                    qsa_b = q_network(state_b).gather(1, action_b) #providing the state to neural network and comparing the 
                                                                #actions with actions stored in memory and gather the experiences
                    next_action_b = self.policy(next_state_b) #using greedy epsilon policy to greedily get next actions
                    next_qsa_b = target_q_network(next_state_b).gather(1, next_action_b) #provide next state and next action to a target neural network
                                                                                        #and gather its experiences
                    target_b = reward_b + ~done_b * gamma * next_qsa_b #discount the experiences of target network
                    loss = F.mse_loss(qsa_b, target_b) #find a Mean square error loss
                    q_network.zero_grad() #reset the gradients of the network
                    loss.backward() #calculate gradients using backward propogation
                    optim.step() # Iterate over all parameters (tensors) that are supposed 
                                # to be updated and use internally stored grad to update their values
                    loss.item() # get the loss
                    stats['MSE Loss'].append(loss.item())
                    grid2op_act = self.gym_env.action_space.from_gym(action)

                state = next_state
                ep_return += reward.item()

            stats['Returns'].append(ep_return)

            if self.episode % 10 == 0:
                target_q_network.load_state_dict(q_network.state_dict()) #After every 10 episodes load state of original network to
                                                                        # target network
        self.trained = True
        return stats
    
    def act(self):
        if self.trained:
            

In [103]:
deep_sarsa_agent = DeepSarsaAgent(episodes=20, env_name = "rte_case14_realistic")

In [104]:
max_iter = 100 #customize
from grid2op.Runner import Runner
import os
# from grid2op.Agent import DoNothingAgent
from grid2op.Reward import L2RPNReward
from grid2op.Chronics import GridStateFromFileWithForecasts

path_saved_data = './Res'
if not os.path.exists(path_saved_data):
    os.mkdir(path_saved_data)

env_name = "rte_case14_realistic"
env = grid2op.make(env_name)

runner = Runner(**env.get_params_for_runner(),
                agentInstance=deep_sarsa_agent, agentClass=None)
res = runner.run(nb_episode=20, max_iter=max_iter, path_save=path_saved_data)
print("The results for the custom agent are:")
for _, chron_name, cum_reward, nb_time_step, max_ts in res:
    msg_tmp = "\tFor chronics with id {}\n".format(chron_name)
    msg_tmp += "\t\t - cumulative reward: {:.6f}\n".format(cum_reward)
    msg_tmp += "\t\t - number of time steps completed: {:.0f} / {:.0f}".format(nb_time_step, max_ts)
    print(msg_tmp)

  5%|████▏                                                                              | 1/20 [00:00<00:06,  2.95it/s]

type=<class 'numpy.ndarray'>
The action is: tensor([[200]])
Memory len is:1 and condition is: 50
The action is: tensor([[177]])
Memory len is:2 and condition is: 50
The action is: tensor([[143]])
Memory len is:3 and condition is: 50
The action is: tensor([[390]])
Memory len is:4 and condition is: 50
The action is: tensor([[159]])
Memory len is:5 and condition is: 50
type=<class 'numpy.ndarray'>
The action is: tensor([[327]])
Memory len is:6 and condition is: 50
The action is: tensor([[226]])
Memory len is:7 and condition is: 50
The action is: tensor([[307]])
Memory len is:8 and condition is: 50
The action is: tensor([[342]])
Memory len is:9 and condition is: 50
The action is: tensor([[162]])
Memory len is:10 and condition is: 50
The action is: tensor([[350]])
Memory len is:11 and condition is: 50
The action is: tensor([[192]])
Memory len is:12 and condition is: 50
The action is: tensor([[51]])


 10%|████████▎                                                                          | 2/20 [00:00<00:06,  2.71it/s]

Memory len is:13 and condition is: 50
type=<class 'numpy.ndarray'>
The action is: tensor([[255]])


 15%|████████████▍                                                                      | 3/20 [00:01<00:05,  3.08it/s]

Memory len is:14 and condition is: 50
The action is: tensor([[116]])
Memory len is:15 and condition is: 50
The action is: tensor([[107]])
Memory len is:16 and condition is: 50


 20%|████████████████▌                                                                  | 4/20 [00:01<00:05,  2.92it/s]

type=<class 'numpy.ndarray'>
The action is: tensor([[378]])
Memory len is:17 and condition is: 50
The action is: tensor([[377]])
Memory len is:18 and condition is: 50
The action is: tensor([[417]])
Memory len is:19 and condition is: 50
The action is: tensor([[197]])
Memory len is:20 and condition is: 50
The action is: tensor([[274]])
Memory len is:21 and condition is: 50
The action is: tensor([[225]])
Memory len is:22 and condition is: 50
The action is: tensor([[320]])
Memory len is:23 and condition is: 50


 25%|████████████████████▊                                                              | 5/20 [00:01<00:04,  3.14it/s]

type=<class 'numpy.ndarray'>
The action is: tensor([[65]])
Memory len is:24 and condition is: 50
The action is: tensor([[108]])
Memory len is:25 and condition is: 50
The action is: tensor([[69]])
Memory len is:26 and condition is: 50
The action is: tensor([[192]])
Memory len is:27 and condition is: 50


 30%|████████████████████████▉                                                          | 6/20 [00:01<00:04,  3.37it/s]

type=<class 'numpy.ndarray'>
The action is: tensor([[194]])
Memory len is:28 and condition is: 50
The action is: tensor([[122]])
Memory len is:29 and condition is: 50


 35%|█████████████████████████████                                                      | 7/20 [00:02<00:03,  3.68it/s]

type=<class 'numpy.ndarray'>
The action is: tensor([[1]])
Memory len is:30 and condition is: 50


 40%|█████████████████████████████████▏                                                 | 8/20 [00:02<00:03,  3.75it/s]

type=<class 'numpy.ndarray'>
The action is: tensor([[413]])
Memory len is:31 and condition is: 50
The action is: tensor([[109]])
Memory len is:32 and condition is: 50


 45%|█████████████████████████████████████▎                                             | 9/20 [00:02<00:03,  3.47it/s]

type=<class 'numpy.ndarray'>
The action is: tensor([[192]])
Memory len is:33 and condition is: 50
The action is: tensor([[363]])
Memory len is:34 and condition is: 50
The action is: tensor([[365]])
Memory len is:35 and condition is: 50
The action is: tensor([[115]])
Memory len is:36 and condition is: 50
The action is: tensor([[337]])
Memory len is:37 and condition is: 50


 50%|█████████████████████████████████████████                                         | 10/20 [00:03<00:03,  3.23it/s]

type=<class 'numpy.ndarray'>
The action is: tensor([[235]])
Memory len is:38 and condition is: 50
The action is: tensor([[260]])
Memory len is:39 and condition is: 50
The action is: tensor([[156]])
Memory len is:40 and condition is: 50
The action is: tensor([[135]])
Memory len is:41 and condition is: 50
The action is: tensor([[454]])
Memory len is:42 and condition is: 50


 55%|█████████████████████████████████████████████                                     | 11/20 [00:03<00:02,  3.49it/s]

type=<class 'numpy.ndarray'>
The action is: tensor([[154]])
Memory len is:43 and condition is: 50


 60%|█████████████████████████████████████████████████▏                                | 12/20 [00:03<00:02,  3.75it/s]

type=<class 'numpy.ndarray'>
The action is: tensor([[286]])
Memory len is:44 and condition is: 50
type=<class 'numpy.ndarray'>
The action is: tensor([[223]])
Memory len is:45 and condition is: 50
The action is: tensor([[405]])
Memory len is:46 and condition is: 50
The action is: tensor([[226]])
Memory len is:47 and condition is: 50
The action is: tensor([[352]])
Memory len is:48 and condition is: 50
The action is: tensor([[334]])
Memory len is:49 and condition is: 50
The action is: tensor([[71]])
Memory len is:50 and condition is: 50
Memory len is:50 and condition is: 50


 65%|█████████████████████████████████████████████████████▎                            | 13/20 [00:03<00:02,  3.33it/s]

its coming here 1
its coming here 2
its coming here 3
its coming here 4
its coming here 5
its coming here 6
its coming here 7
its coming here 8
its coming here 9
its coming here 10
5162.18359375
type=<class 'numpy.ndarray'>


 70%|█████████████████████████████████████████████████████████▍                        | 14/20 [00:04<00:01,  3.62it/s]

The action is: tensor([[120]])
Memory len is:51 and condition is: 50
Memory len is:51 and condition is: 50
its coming here 1
its coming here 2
its coming here 3
its coming here 4
its coming here 5
its coming here 6
its coming here 7
its coming here 8
its coming here 9
its coming here 10
3612.47412109375


 75%|█████████████████████████████████████████████████████████████▌                    | 15/20 [00:04<00:01,  3.54it/s]

type=<class 'numpy.ndarray'>
The action is: tensor([[296]])
Memory len is:52 and condition is: 50
Memory len is:52 and condition is: 50
its coming here 1
its coming here 2
its coming here 3
its coming here 4
its coming here 5
its coming here 6
its coming here 7
its coming here 8
its coming here 9
its coming here 10
1795.0924072265625
The action is: tensor([[174]])
Memory len is:53 and condition is: 50
Memory len is:53 and condition is: 50
its coming here 1
its coming here 2
its coming here 3
its coming here 4
its coming here 5
its coming here 6
its coming here 7
its coming here 8
its coming here 9
its coming here 10
3004.345703125


 80%|█████████████████████████████████████████████████████████████████▌                | 16/20 [00:04<00:01,  3.30it/s]

type=<class 'numpy.ndarray'>
The action is: tensor([[213]])
Memory len is:54 and condition is: 50
Memory len is:54 and condition is: 50
its coming here 1
its coming here 2
its coming here 3
its coming here 4
its coming here 5
its coming here 6
its coming here 7
its coming here 8
its coming here 9
its coming here 10
3463.921875
The action is: tensor([[68]])
Memory len is:55 and condition is: 50
Memory len is:55 and condition is: 50
its coming here 1
its coming here 2
its coming here 3
its coming here 4
its coming here 5
its coming here 6
its coming here 7
its coming here 8
its coming here 9
its coming here 10
4651.3408203125
The action is: tensor([[180]])
Memory len is:56 and condition is: 50
Memory len is:56 and condition is: 50
its coming here 1
its coming here 2
its coming here 3
its coming here 4
its coming here 5
its coming here 6
its coming here 7
its coming here 8
its coming here 9
its coming here 10
4673.5087890625
The action is: tensor([[5]])
Memory len is:57 and condition is: 

 85%|█████████████████████████████████████████████████████████████████████▋            | 17/20 [00:05<00:00,  3.09it/s]

type=<class 'numpy.ndarray'>
The action is: tensor([[43]])
Memory len is:63 and condition is: 50
Memory len is:63 and condition is: 50
its coming here 1
its coming here 2
its coming here 3
its coming here 4
its coming here 5
its coming here 6
its coming here 7
its coming here 8
its coming here 9
its coming here 10
2740.381591796875
The action is: tensor([[199]])
Memory len is:64 and condition is: 50
Memory len is:64 and condition is: 50
its coming here 1
its coming here 2
its coming here 3
its coming here 4
its coming here 5
its coming here 6
its coming here 7
its coming here 8
its coming here 9
its coming here 10
14.391748428344727
The action is: tensor([[130]])
Memory len is:65 and condition is: 50
Memory len is:65 and condition is: 50
its coming here 1
its coming here 2
its coming here 3
its coming here 4
its coming here 5
its coming here 6
its coming here 7
its coming here 8
its coming here 9
its coming here 10
1473.408447265625
The action is: tensor([[271]])
Memory len is:66 and c

 90%|█████████████████████████████████████████████████████████████████████████▊        | 18/20 [00:05<00:00,  2.89it/s]

its coming here 1
its coming here 2
its coming here 3
its coming here 4
its coming here 5
its coming here 6
its coming here 7
its coming here 8
its coming here 9
its coming here 10
5641.1455078125
type=<class 'numpy.ndarray'>


 95%|█████████████████████████████████████████████████████████████████████████████▉    | 19/20 [00:05<00:00,  3.11it/s]

The action is: tensor([[63]])
Memory len is:67 and condition is: 50
Memory len is:67 and condition is: 50
its coming here 1
its coming here 2
its coming here 3
its coming here 4
its coming here 5
its coming here 6
its coming here 7
its coming here 8
its coming here 9
its coming here 10
4780.2734375
The action is: tensor([[58]])
Memory len is:68 and condition is: 50
Memory len is:68 and condition is: 50
its coming here 1
its coming here 2
its coming here 3
its coming here 4
its coming here 5
its coming here 6
its coming here 7
its coming here 8
its coming here 9
its coming here 10
4065.79248046875


100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:06<00:00,  3.31it/s]

type=<class 'numpy.ndarray'>
The action is: tensor([[6]])
Memory len is:69 and condition is: 50
Memory len is:69 and condition is: 50
its coming here 1
its coming here 2
its coming here 3
its coming here 4
its coming here 5
its coming here 6
its coming here 7
its coming here 8
its coming here 9
its coming here 10
2948.8623046875





AttributeError: 'dict' object has no attribute '_redispatch'