# Importing Libraries

In [1]:
import os
import sys
import grid2op
from grid2op.Parameters import Parameters
import numpy as np
from grid2op.dtypes import dt_bool
from grid2op.Agent.greedyAgent import GreedyAgent
from grid2op.Runner import Runner
from grid2op.dtypes import dt_float

# Creating environment and splitting it into training, validation, and testing

In [53]:
env = grid2op.make("rte_case14_redisp")
nm_env_train, nm_env_val, nm_env_test = env.train_val_split_random(pct_val=1., pct_test=1., add_for_test='test')

We will attempt to download this environment from remote
rte_case14_redisp.tar.bz2: 0.00B [00:00, ?B/s]

downloading the training data, this may take a while.


rte_case14_redisp.tar.bz2: 292MB [00:25, 11.4MB/s]                                                                     


Extract the tar archive in "C:\Users\tejus_\data_grid2op"
	 Successfully updated file "grid.json" for environment "rte_case14_redisp"
You may now use the environment "rte_case14_redisp" with the available data by invoking:
	env = grid2op.make("rte_case14_redisp")


## Loading training environment

In [56]:
train_env = grid2op.make(nm_env_train)
obs = train_env.reset()

# Agent Creation

In [60]:
class SwitchingAgentV2(GreedyAgent):

    #This method is used to initialise class in python. We have inherited properties 
    #from greedy agent which requires only action space to run hence no extra parameter needed
    def __init__(self, action_space):
        GreedyAgent.__init__(self, action_space)

    #This function is important here we simulate reward and choose the action whose reward is maximum
    def act(self, observation, reward, done=False):
        
        #here we selecting top 60 actions which are provided by get tested action function
        self.tested_action = self._get_tested_action(observation)
        
        #this loop is creating the reward matrix based on number of actions provided to it 
        if len(self.tested_action) > 1:
            self.resulting_rewards = np.full(
                shape=len(self.tested_action), fill_value=np.NaN, dtype=dt_float
            )
            #Below forloop will iterate through every action, and simulate the reward and save it in reward matrix created above
            #it will then select the index of the reward with maximum value and fetch 
            #the best action based on that reward and return it
            for i, action in enumerate(self.tested_action):
                (
                    simul_obs,
                    simul_reward, #here it is simulating the reward
                    simul_has_error,
                    simul_info,
                ) = observation.simulate(action)
                self.resulting_rewards[i] = simul_reward
            reward_idx = int(
                np.argmax(self.resulting_rewards) #fetching the index of reward with maximum value
            )  # rewards.index(max(rewards))
            best_action = self.tested_action[reward_idx] #selecting the best action based on the reward index
            print(reward_idx)
        else:
            best_action = self.tested_action[0]
        print(best_action)
        return best_action
    
    #this is the method where I have done some changes
    # previously this method used to just connect and disconnect the powerline, and if it was connecting the powerline 
    # then it was cinnecting it to only bus 1
    # currently this function is performing 3 actions
    # 1. is the same as I previously mentioned
    # 2. it will change the bus of origin end of powerline (it will connect it to bus 1 if it was 2 vice versa)
    # 3. it will change the bus of extrimity end of powerline (it will connect it to bus 1 if it was 2 vice versa)
    # it will then feed the output to act function above
    def _get_tested_action(self, observation):
        res = [self.action_space({})]  # add the do nothing
        for i in range(self.action_space.n_line):
            for toggle_action in range(3): #Added this toggle to choose 3 actions
                tmp = np.full(self.action_space.n_line, fill_value=False, dtype=dt_bool)
                tmp[i] = True
                if toggle_action == 0:
                    action = self.action_space({"change_line_status": tmp}) # it will connect or disconnect the powerline
                    if not observation.line_status[i]: #if it is connecting the powerline it will connect to bus 1 only (no logic just random)
                        # so the action consisted in reconnecting the powerline
                        # i need to say on which bus (always on bus 1 for this type of agent)
                        action = action.update(
                            {"set_bus": {"lines_or_id": [(i, 1)], "lines_ex_id": [(i, 1)]}})
                    res.append(action)
                elif toggle_action == 1:
                    if observation.line_status[i]: #this is checking if the powerline is already connected, it will take action only if its connected
                        # this will change the bus of origin end of a powerline i(it will connect it to bus 1 if it was 2 vice versa)
                        action = self.action_space({"change_bus": {"lines_or_id": i}}) 
                        res.append(action)
                elif toggle_action == 2:
                    if observation.line_status[i]: #this is checking if the powerline is already connected, it will take action only if its connected
                        # this will change the bus of extrimity end of a powerline i(it will connect it to bus 1 if it was 2 vice versa)
                        action = self.action_space({"change_bus": {"lines_ex_id": i}}) 
                        res.append(action)
        return res

In [61]:
myagentv2 = SwitchingAgentV2(env.action_space)

runner = Runner(**train_env.get_params_for_runner(), agentClass=SwitchingAgentV2)
res = runner.run(nb_episode=5, max_iter=20) #running it for 5 episodes and 20 iterations so 100 times

52
This action will:
	 - NOT change anything to the injections
	 - NOT perform any redispatching action
	 - NOT modify any storage capacity
	 - NOT perform any curtailment
	 - NOT force any line status
	 - Switch status of 1 powerlines ([17])
	 - NOT switch anything in the topology
	 - NOT force any particular bus configuration
49
This action will:
	 - NOT change anything to the injections
	 - NOT perform any redispatching action
	 - NOT modify any storage capacity
	 - NOT perform any curtailment
	 - NOT force any line status
	 - Switch status of 1 powerlines ([16])
	 - NOT switch anything in the topology
	 - NOT force any particular bus configuration
0
This action will:
	 - NOT change anything to the injections
	 - NOT perform any redispatching action
	 - NOT modify any storage capacity
	 - NOT perform any curtailment
	 - NOT force any line status
	 - NOT switch any line status
	 - NOT switch anything in the topology
	 - NOT force any particular bus configuration
0
This action will:
	

0
This action will:
	 - NOT change anything to the injections
	 - NOT perform any redispatching action
	 - NOT modify any storage capacity
	 - NOT perform any curtailment
	 - NOT force any line status
	 - NOT switch any line status
	 - NOT switch anything in the topology
	 - NOT force any particular bus configuration
0
This action will:
	 - NOT change anything to the injections
	 - NOT perform any redispatching action
	 - NOT modify any storage capacity
	 - NOT perform any curtailment
	 - NOT force any line status
	 - NOT switch any line status
	 - NOT switch anything in the topology
	 - NOT force any particular bus configuration
0
This action will:
	 - NOT change anything to the injections
	 - NOT perform any redispatching action
	 - NOT modify any storage capacity
	 - NOT perform any curtailment
	 - NOT force any line status
	 - NOT switch any line status
	 - NOT switch anything in the topology
	 - NOT force any particular bus configuration
49
This action will:
	 - NOT change anythin

0
This action will:
	 - NOT change anything to the injections
	 - NOT perform any redispatching action
	 - NOT modify any storage capacity
	 - NOT perform any curtailment
	 - NOT force any line status
	 - NOT switch any line status
	 - NOT switch anything in the topology
	 - NOT force any particular bus configuration
0
This action will:
	 - NOT change anything to the injections
	 - NOT perform any redispatching action
	 - NOT modify any storage capacity
	 - NOT perform any curtailment
	 - NOT force any line status
	 - NOT switch any line status
	 - NOT switch anything in the topology
	 - NOT force any particular bus configuration
0
This action will:
	 - NOT change anything to the injections
	 - NOT perform any redispatching action
	 - NOT modify any storage capacity
	 - NOT perform any curtailment
	 - NOT force any line status
	 - NOT switch any line status
	 - NOT switch anything in the topology
	 - NOT force any particular bus configuration
0
This action will:
	 - NOT change anything

0
This action will:
	 - NOT change anything to the injections
	 - NOT perform any redispatching action
	 - NOT modify any storage capacity
	 - NOT perform any curtailment
	 - NOT force any line status
	 - NOT switch any line status
	 - NOT switch anything in the topology
	 - NOT force any particular bus configuration
0
This action will:
	 - NOT change anything to the injections
	 - NOT perform any redispatching action
	 - NOT modify any storage capacity
	 - NOT perform any curtailment
	 - NOT force any line status
	 - NOT switch any line status
	 - NOT switch anything in the topology
	 - NOT force any particular bus configuration
52
This action will:
	 - NOT change anything to the injections
	 - NOT perform any redispatching action
	 - NOT modify any storage capacity
	 - NOT perform any curtailment
	 - NOT force any line status
	 - Switch status of 1 powerlines ([17])
	 - NOT switch anything in the topology
	 - NOT force any particular bus configuration
0
This action will:
	 - NOT chan

In [62]:
print("The results are:")
for chron_name, _, cum_reward, nb_time_step, max_ts in res:
    msg_tmp = "\tFor chronics located at {}\n".format(chron_name)
    msg_tmp += "\t\t - cumulative reward: {:.2f}\n".format(cum_reward)
    msg_tmp += "\t\t - number of time steps completed: {:.0f} / {:.0f}".format(nb_time_step, max_ts)
    print(msg_tmp)

The results are:
	For chronics located at C:\Users\tejus_\data_grid2op\rte_case14_redisp_train\chronics\0
		 - cumulative reward: 1870.59
		 - number of time steps completed: 20 / 20
	For chronics located at C:\Users\tejus_\data_grid2op\rte_case14_redisp_train\chronics\10
		 - cumulative reward: 1898.51
		 - number of time steps completed: 20 / 20
	For chronics located at C:\Users\tejus_\data_grid2op\rte_case14_redisp_train\chronics\100
		 - cumulative reward: 1889.78
		 - number of time steps completed: 20 / 20
	For chronics located at C:\Users\tejus_\data_grid2op\rte_case14_redisp_train\chronics\101
		 - cumulative reward: 1907.64
		 - number of time steps completed: 20 / 20
	For chronics located at C:\Users\tejus_\data_grid2op\rte_case14_redisp_train\chronics\102
		 - cumulative reward: 1901.78
		 - number of time steps completed: 20 / 20


So As you can see here its learning a little bit, but I dont know how to train it.
Next steps would be
1. Figure out a way to train it properly
2. Use the trained model on validation and test
3. Improve it using something