In [1]:
import grid2op
import lightsim2grid
import warnings
import os
import numpy as np
import sys

from lightsim2grid.LightSimBackend import LightSimBackend
import numpy as np
from agent import Track2PowerNetAgent

from d3qn.adversary import D3QN_Opponent
from l2rpn_baselines.DoubleDuelingDQN.DoubleDuelingDQNConfig import DoubleDuelingDQNConfig as cfg
np.set_printoptions(threshold=sys.maxsize)


MAX_TIMESTEP = 7 * 288

LINES = ['0_1_0', '0_2_1', '10_11_2', '69_70_3', '23_71_4', '70_71_5',
       '70_72_6', '69_73_7', '69_74_8', '68_74_9', '73_74_10', '75_76_11',
       '68_76_12', '1_11_13', '74_76_14', '76_77_15', '77_78_16',
       '76_79_17', '76_79_18', '78_79_19', '76_81_20', '81_82_21',
       '82_83_22', '82_84_23', '2_11_24', '83_84_25', '84_85_26',
       '84_87_27', '84_88_28', '87_88_29', '88_89_30', '88_89_31',
       '89_90_32', '88_91_33', '88_91_34', '6_11_35', '90_91_36',
       '91_92_37', '91_93_38', '92_93_39', '93_94_40', '79_95_41',
       '81_95_42', '93_95_43', '79_96_44', '79_97_45', '10_12_46',
       '79_98_47', '91_99_48', '93_99_49', '94_95_50', '95_96_51',
       '97_99_52', '98_99_53', '99_100_54', '91_101_55', '100_101_56',
       '11_13_57', '99_102_58', '99_103_59', '102_103_60', '102_104_61',
       '99_105_62', '103_104_63', '104_105_64', '104_106_65',
       '104_107_66', '105_106_67', '12_14_68', '107_108_69', '102_109_70',
       '108_109_71', '109_110_72', '109_111_73', '16_112_74', '31_112_75',
       '31_113_76', '26_114_77', '113_114_78', '13_14_79', '11_116_80',
       '74_117_81', '75_117_82', '11_15_83', '14_16_84', '3_4_85',
       '15_16_86', '16_17_87', '17_18_88', '18_19_89', '14_18_90',
       '19_20_91', '20_21_92', '21_22_93', '22_23_94', '22_24_95',
       '2_4_96', '24_26_97', '26_27_98', '27_28_99', '7_29_100',
       '25_29_101', '16_30_102', '28_30_103', '22_31_104', '30_31_105',
       '26_31_106', '4_5_107', '14_32_108', '18_33_109', '34_35_110',
       '34_36_111', '32_36_112', '33_35_113', '33_36_114', '36_38_115',
       '36_39_116', '29_37_117', '5_6_118', '38_39_119', '39_40_120',
       '39_41_121', '40_41_122', '42_43_123', '33_42_124', '43_44_125',
       '44_45_126', '45_46_127', '45_47_128', '7_8_129', '46_48_130',
       '41_48_131', '41_48_132', '44_48_133', '47_48_134', '48_49_135',
       '48_50_136', '50_51_137', '51_52_138', '52_53_139', '8_9_140',
       '48_53_141', '48_53_142', '53_54_143', '53_55_144', '54_55_145',
       '55_56_146', '49_56_147', '55_57_148', '50_57_149', '53_58_150',
       '3_10_151', '55_58_152', '55_58_153', '54_58_154', '58_59_155',
       '58_60_156', '59_60_157', '59_61_158', '60_61_159', '62_63_160',
       '37_64_161', '4_10_162', '63_64_163', '48_65_164', '48_65_165',
       '61_65_166', '61_66_167', '65_66_168', '46_68_169', '48_68_170',
       '68_69_171', '23_69_172', '7_4_173', '25_24_174', '80_79_175',
       '86_85_176', '115_67_177', '29_16_178', '37_36_179', '62_58_180',
       '63_60_181', '64_65_182', '64_67_183', '67_68_184', '80_67_185']

  return f(*args, **kwds)


In [5]:
class Evaluator(object):
    def __init__(self, env, agent, opponent):
        self.env = env
        self.agent = agent
        self.opponent = opponent
        self.action_counter_arr = [] # count the actions taken for each episode

    def run(self, opponent, num_episodes):
        steps_buffer = []
        rewards_buffer = []

        for i_episode in range(num_episodes):
            action_counter = {}
            obs = self.env.reset()
            if opponent:
                opponent.reset(obs)
            max_day = (
                self.env.chronics_handler.max_timestep() - MAX_TIMESTEP) // 288
            start_timestep = np.random.randint(
                max_day) * 288 - 1  # start at 00:00
            if start_timestep > 0:
                print(f'episode {i_episode} starting at timestep {start_timestep}')
                self.env.fast_forward_chronics(start_timestep)

            obs = self.env.get_obs()
            done = False
            steps = 0
            rewards = 0
            while not done:
                action = self.agent.act(obs, None, None)
                obs, reward, done, info = self.env.step(action)
                assert not info['is_illegal'] and not info['is_ambiguous']
                rewards += reward
                steps += 1
                
                if done: # to prevent opponent from taking action on finished episode
                    break
                
                if opponent:
                    opponent.take_step(obs)
                    if opponent.remaining_time >= 0:
                        obs.time_before_cooldown_line[opponent.attack_line] = opponent.remaining_time
                        opponent.remaining_time -= 1
                    else: # attack (only one disconnection at a time)
                        response = opponent.attack(obs)
                        if response is not None:
                            attack, a = response
                            if a not in action_counter:
                                action_counter[a] = 0
                            action_counter[a] += 1
                            obs, opp_reward, done, info = env.step(attack)
                            opponent.tell_attack_continues()
                
                if steps >= MAX_TIMESTEP:
                    break
            self.action_counter_arr.append(action_counter)
            steps_buffer.append(steps)
            rewards_buffer.append(rewards)
            print(f'Episode {i_episode+1}/{num_episodes} - Reward: {rewards:.2f}, Num Steps: {steps}')

        return np.mean(steps_buffer), np.mean(rewards_buffer)

## Untrained D3QN

In [18]:
num_episodes = 20
# how frequent the attack is.
# after each attack, next_attack_time is set to 1 + rnadint(attack_period
attack_period = 50
# how long the line is cooled down for after attack
attack_duration = 20

warnings.filterwarnings("ignore")
backend = LightSimBackend()
env = grid2op.make("l2rpn_neurips_2020_track2_small", backend=backend)
warnings.filterwarnings("default")

agent = Track2PowerNetAgent(env.action_space)
opponent = D3QN_Opponent(env.action_space, env.observation_space, lines_to_attack=LINES, attack_period=attack_period,
                attack_duration=attack_duration,is_training=False)
opponent.epsilon = 0

evaluator = Evaluator(env, agent, opponent)

mean_steps, mean_rewards = evaluator.run(opponent, num_episodes)
print('num_episodes: {}, mean_reward: {:.1f}, mean_steps: {:.1f}'.format(
    num_episodes, mean_rewards, mean_steps))

[32m[05-11 00:10:10 MainThread @machine_info.py:91][0m Cannot find available GPU devices, using CPU or other devices now.
[32m[05-11 00:10:11 MainThread @machine_info.py:91][0m Cannot find available GPU devices, using CPU or other devices now.
Episode 1/20 - Reward: 1333493.88, Num Steps: 1482
Episode 2/20 - Reward: 1789041.55, Num Steps: 2016
Episode 3/20 - Reward: 2001305.56, Num Steps: 2016
Episode 4/20 - Reward: 155874.02, Num Steps: 145
Episode 5/20 - Reward: 336453.75, Num Steps: 388
Episode 6/20 - Reward: 1713921.74, Num Steps: 2016
Episode 7/20 - Reward: 1904617.76, Num Steps: 2016
Episode 8/20 - Reward: 134445.20, Num Steps: 142
Episode 9/20 - Reward: 2204262.93, Num Steps: 2016
Episode 10/20 - Reward: 321369.25, Num Steps: 432
Episode 11/20 - Reward: 1665835.20, Num Steps: 2016
Episode 12/20 - Reward: 1938558.43, Num Steps: 2016
Episode 13/20 - Reward: 1325488.44, Num Steps: 1349
Episode 14/20 - Reward: 2256559.62, Num Steps: 2016
Episode 15/20 - Reward: 1525722.80, Num S

In [19]:
evaluator.action_counter_arr

[{0: 2, 77: 68},
 {179: 65, 77: 25, 104: 1},
 {77: 91, 179: 1},
 {77: 6, 182: 1},
 {77: 17},
 {77: 92},
 {77: 57, 179: 34},
 {0: 1, 179: 4, 77: 3},
 {77: 87, 179: 4},
 {0: 1, 179: 18, 77: 1, 182: 1},
 {77: 91, 179: 1},
 {179: 91},
 {77: 46, 179: 16},
 {77: 61, 38: 30},
 {77: 92},
 {77: 91},
 {0: 2, 77: 92},
 {182: 8, 77: 54, 179: 30},
 {77: 24, 179: 64, 38: 2, 185: 1},
 {77: 68, 179: 15}]

## Trained D3QN

In [20]:
num_episodes = 20
# how frequent the attack is.
# after each attack, next_attack_time is set to 1 + rnadint(attack_period
attack_period = 50
# how long the line is cooled down for after attack
attack_duration = 20

warnings.filterwarnings("ignore")
backend = LightSimBackend()
env = grid2op.make("l2rpn_neurips_2020_track2_small", backend=backend)
warnings.filterwarnings("default")

agent = Track2PowerNetAgent(env.action_space)
opponent = D3QN_Opponent(env.action_space, env.observation_space, lines_to_attack=LINES, attack_period=attack_period,
                attack_duration=attack_duration,is_training=False)
opponent.epsilon = 0
opponent.load('./PARL_opp_D3QN_15000_atk_period_10_atk_duration_20/D3QN_PARL.h5')

evaluator = Evaluator(env, agent, opponent)

mean_steps, mean_rewards = evaluator.run(opponent, num_episodes)
print('num_episodes: {}, mean_reward: {:.1f}, mean_steps: {:.1f}'.format(
    num_episodes, mean_rewards, mean_steps))

[32m[05-11 00:22:41 MainThread @machine_info.py:91][0m Cannot find available GPU devices, using CPU or other devices now.
[32m[05-11 00:22:41 MainThread @machine_info.py:91][0m Cannot find available GPU devices, using CPU or other devices now.
Successfully loaded network from: ./PARL_opp_D3QN_15000_atk_period_10_atk_duration_20/D3QN_PARL.h5
Episode 1/20 - Reward: 1785404.87, Num Steps: 2016
Episode 2/20 - Reward: 1940701.17, Num Steps: 2016
Episode 3/20 - Reward: 2015617.30, Num Steps: 2016
Episode 4/20 - Reward: 2250971.03, Num Steps: 2016
Episode 5/20 - Reward: 1554194.24, Num Steps: 2016
Episode 6/20 - Reward: 1713969.59, Num Steps: 2016
Episode 7/20 - Reward: 812328.48, Num Steps: 933
Episode 8/20 - Reward: 1994589.92, Num Steps: 2016
Episode 9/20 - Reward: 2130079.11, Num Steps: 2016
Episode 10/20 - Reward: 1576804.77, Num Steps: 2016
Episode 11/20 - Reward: 1488290.49, Num Steps: 1767
Episode 12/20 - Reward: 1938645.12, Num Steps: 2016
Episode 13/20 - Reward: 1243971.03, Num 

In [21]:
evaluator.action_counter_arr

[{0: 94},
 {0: 92},
 {0: 91},
 {0: 92},
 {0: 92},
 {0: 91},
 {0: 43},
 {0: 91},
 {0: 92},
 {0: 91},
 {0: 82, 16: 1},
 {0: 91},
 {0: 55},
 {0: 92},
 {0: 91},
 {0: 94},
 {0: 92},
 {0: 91},
 {0: 92},
 {0: 92}]

In [27]:
num_episodes = 10
# how frequent the attack is.
# after each attack, next_attack_time is set to 1 + rnadint(attack_period
attack_period = 50
# how long the line is cooled down for after attack
attack_duration = 20

warnings.filterwarnings("ignore")
backend = LightSimBackend()
env = grid2op.make("l2rpn_neurips_2020_track2_small", backend=backend)
warnings.filterwarnings("default")

agent = Track2PowerNetAgent(env.action_space)
opponent = D3QN_Opponent(env.action_space, env.observation_space, lines_to_attack=LINES, attack_period=attack_period,
                attack_duration=attack_duration,is_training=False)
opponent.epsilon = 0
opponent.load('./PARL_opp_D3QN_256_atk_period_50_atk_duration_20/D3QN_PARL.h5')

evaluator = Evaluator(env, agent, opponent)

mean_steps, mean_rewards = evaluator.run(opponent, num_episodes)
print('num_episodes: {}, mean_reward: {:.1f}, mean_steps: {:.1f}'.format(
    num_episodes, mean_rewards, mean_steps))

[32m[05-11 02:34:50 MainThread @machine_info.py:91][0m Cannot find available GPU devices, using CPU or other devices now.
[32m[05-11 02:34:50 MainThread @machine_info.py:91][0m Cannot find available GPU devices, using CPU or other devices now.
Successfully loaded network from: ./PARL_opp_D3QN_256_atk_period_50_atk_duration_20/D3QN_PARL.h5
Episode 1/10 - Reward: 551749.31, Num Steps: 656
Episode 2/10 - Reward: 1940690.64, Num Steps: 2016
Episode 3/10 - Reward: 2001284.10, Num Steps: 2016
Episode 4/10 - Reward: 2146718.51, Num Steps: 2016
Episode 5/10 - Reward: 1634867.23, Num Steps: 2016
Episode 6/10 - Reward: 1791973.68, Num Steps: 2016
Episode 7/10 - Reward: 1904813.93, Num Steps: 2016
Episode 8/10 - Reward: 2051568.91, Num Steps: 2016
Episode 9/10 - Reward: 2216903.81, Num Steps: 2016
Episode 10/10 - Reward: 1512693.24, Num Steps: 2016
num_episodes: 10, mean_reward: 1775326.3, mean_steps: 1880.0


In [28]:
evaluator.action_counter_arr

[{0: 32},
 {0: 92},
 {0: 91},
 {0: 93},
 {0: 92},
 {0: 91},
 {0: 92},
 {0: 92},
 {0: 91},
 {0: 92}]

In [10]:
num_episodes = 10
# how frequent the attack is.
# after each attack, next_attack_time is set to 1 + rnadint(attack_period
attack_period = 50
# how long the line is cooled down for after attack
attack_duration = 20

warnings.filterwarnings("ignore")
backend = LightSimBackend()
env = grid2op.make("l2rpn_neurips_2020_track2_small", backend=backend)
warnings.filterwarnings("default")

agent = Track2PowerNetAgent(env.action_space)
opponent = D3QN_Opponent(env.action_space, env.observation_space, lines_to_attack=LINES, attack_period=attack_period,
                attack_duration=attack_duration,is_training=False)
opponent.epsilon = 0
opponent.load('./PARL_opp_D3QN_shifted_reward_2000_atk_period_50_atk_duration_20/D3QN_PARL.h5')

evaluator = Evaluator(env, agent, opponent)

mean_steps, mean_rewards = evaluator.run(opponent, num_episodes)
print('num_episodes: {}, mean_reward: {:.1f}, mean_steps: {:.1f}'.format(
    num_episodes, mean_rewards, mean_steps))
evaluator.action_counter_arr

[32m[05-16 19:58:36 MainThread @machine_info.py:91][0m Cannot find available GPU devices, using CPU or other devices now.
[32m[05-16 19:58:36 MainThread @machine_info.py:91][0m Cannot find available GPU devices, using CPU or other devices now.
Successfully loaded network from: ./PARL_opp_D3QN_shifted_reward_2000_atk_period_50_atk_duration_20/D3QN_PARL.h5
episode 0 starting at timestep 2879
Episode 1/10 - Reward: 5050.40, Num Steps: 7
episode 1 starting at timestep 1727
Episode 2/10 - Reward: 20932.39, Num Steps: 23
episode 2 starting at timestep 3167
Episode 3/10 - Reward: 22689.47, Num Steps: 23
episode 3 starting at timestep 2591
Episode 4/10 - Reward: 24692.63, Num Steps: 23
episode 4 starting at timestep 863
Episode 5/10 - Reward: 18363.11, Num Steps: 23
episode 5 starting at timestep 863
Episode 6/10 - Reward: 19905.21, Num Steps: 23
episode 6 starting at timestep 5183
Episode 7/10 - Reward: 19835.74, Num Steps: 23
episode 7 starting at timestep 1439
Episode 8/10 - Reward: 222

[{0: 2, 174: 1},
 {174: 1},
 {174: 1},
 {174: 1},
 {174: 1},
 {174: 1},
 {174: 1},
 {174: 1},
 {174: 1},
 {174: 1}]

In [10]:
opponent.policy_net.model

<tensorflow.python.keras.engine.functional.Functional at 0x7f9b390ebb70>

In [None]:
opponent.policy_net.model.get_weights()