In [2]:
import grid2op
import lightsim2grid
import warnings

from lightsim2grid.LightSimBackend import LightSimBackend
import numpy as np
from agent import Track2PowerNetAgent

from simple_opponents.random_opponent import RandomOpponent, WeightedRandomOpponent
from simple_opponents.nothing_opponent import DoNothingOpponent

MAX_TIMESTEP = 7 * 288

LINES = ['0_1_0', '0_2_1', '10_11_2', '69_70_3', '23_71_4', '70_71_5',
       '70_72_6', '69_73_7', '69_74_8', '68_74_9', '73_74_10', '75_76_11',
       '68_76_12', '1_11_13', '74_76_14', '76_77_15', '77_78_16',
       '76_79_17', '76_79_18', '78_79_19', '76_81_20', '81_82_21',
       '82_83_22', '82_84_23', '2_11_24', '83_84_25', '84_85_26',
       '84_87_27', '84_88_28', '87_88_29', '88_89_30', '88_89_31',
       '89_90_32', '88_91_33', '88_91_34', '6_11_35', '90_91_36',
       '91_92_37', '91_93_38', '92_93_39', '93_94_40', '79_95_41',
       '81_95_42', '93_95_43', '79_96_44', '79_97_45', '10_12_46',
       '79_98_47', '91_99_48', '93_99_49', '94_95_50', '95_96_51',
       '97_99_52', '98_99_53', '99_100_54', '91_101_55', '100_101_56',
       '11_13_57', '99_102_58', '99_103_59', '102_103_60', '102_104_61',
       '99_105_62', '103_104_63', '104_105_64', '104_106_65',
       '104_107_66', '105_106_67', '12_14_68', '107_108_69', '102_109_70',
       '108_109_71', '109_110_72', '109_111_73', '16_112_74', '31_112_75',
       '31_113_76', '26_114_77', '113_114_78', '13_14_79', '11_116_80',
       '74_117_81', '75_117_82', '11_15_83', '14_16_84', '3_4_85',
       '15_16_86', '16_17_87', '17_18_88', '18_19_89', '14_18_90',
       '19_20_91', '20_21_92', '21_22_93', '22_23_94', '22_24_95',
       '2_4_96', '24_26_97', '26_27_98', '27_28_99', '7_29_100',
       '25_29_101', '16_30_102', '28_30_103', '22_31_104', '30_31_105',
       '26_31_106', '4_5_107', '14_32_108', '18_33_109', '34_35_110',
       '34_36_111', '32_36_112', '33_35_113', '33_36_114', '36_38_115',
       '36_39_116', '29_37_117', '5_6_118', '38_39_119', '39_40_120',
       '39_41_121', '40_41_122', '42_43_123', '33_42_124', '43_44_125',
       '44_45_126', '45_46_127', '45_47_128', '7_8_129', '46_48_130',
       '41_48_131', '41_48_132', '44_48_133', '47_48_134', '48_49_135',
       '48_50_136', '50_51_137', '51_52_138', '52_53_139', '8_9_140',
       '48_53_141', '48_53_142', '53_54_143', '53_55_144', '54_55_145',
       '55_56_146', '49_56_147', '55_57_148', '50_57_149', '53_58_150',
       '3_10_151', '55_58_152', '55_58_153', '54_58_154', '58_59_155',
       '58_60_156', '59_60_157', '59_61_158', '60_61_159', '62_63_160',
       '37_64_161', '4_10_162', '63_64_163', '48_65_164', '48_65_165',
       '61_65_166', '61_66_167', '65_66_168', '46_68_169', '48_68_170',
       '68_69_171', '23_69_172', '7_4_173', '25_24_174', '80_79_175',
       '86_85_176', '115_67_177', '29_16_178', '37_36_179', '62_58_180',
       '63_60_181', '64_65_182', '64_67_183', '67_68_184', '80_67_185']

In [15]:
class Evaluator(object):
    def __init__(self, env, agent, opponent):
        self.env = env
        self.agent = agent
        self.opponent = opponent
        self.action_counter_arr = [] # count the actions taken for each episode

    def run(self, opponent, num_episodes):
        steps_buffer = []
        rewards_buffer = []

        for i_episode in range(num_episodes):
            action_counter = {}
            _ = self.env.reset()
            if opponent:
                opponent.reset()
            max_day = (
                self.env.chronics_handler.max_timestep() - MAX_TIMESTEP) // 288
            start_timestep = np.random.randint(
                max_day) * 288 - 1  # start at 00:00
            if start_timestep > 0:
                self.env.fast_forward_chronics(start_timestep)

            obs = self.env.get_obs()
            done = False
            steps = 0
            rewards = 0
            while not done:
                action = self.agent.act(obs, None, None)
                obs, reward, done, info = self.env.step(action)
                assert not info['is_illegal'] and not info['is_ambiguous']
                rewards += reward
                steps += 1
                
                if done: # to prevent opponent from taking action on finished episode
                    break
                
                if opponent:
                    opponent.take_step(obs)
                    if opponent.remaining_time >= 0:
                        obs.time_before_cooldown_line[opponent.attack_line] = opponent.remaining_time
                        opponent.remaining_time -= 1
                    else: # attack (only one disconnection at a time)
                        response = opponent.attack(obs)
                        if response is not None:
                            attack, a = response
                            if a not in action_counter:
                                action_counter[a] = 0
                            action_counter[a] += 1
                            obs, opp_reward, done, info = env.step(attack)
                            opponent.tell_attack_continues()
                
                if steps >= MAX_TIMESTEP:
                    break
            self.action_counter_arr.append(action_counter)
            steps_buffer.append(steps)
            rewards_buffer.append(rewards)
            print(f'Episode {i_episode+1}/{num_episodes} - Reward: {rewards:.2f}, Num Steps: {steps}')

        return np.mean(steps_buffer), np.mean(rewards_buffer)

## DoNothing Opponent

In [9]:
num_episodes = 20
env_dir=None
# how frequent the attack is.
# after each attack, next_attack_time is set to 1 + rnadint(attack_period
attack_period = 20
# how long the line is cooled down for after attack
attack_duration = 10

warnings.filterwarnings("ignore")
backend = LightSimBackend()
env = grid2op.make("l2rpn_neurips_2020_track2_small", backend=backend)

warnings.filterwarnings("default")
agent = Track2PowerNetAgent(env.action_space)
opponent = DoNothingOpponent(env.observation_space, env.action_space)
evaluator = Evaluator(env, agent, opponent)

mean_steps, mean_rewards = evaluator.run(opponent, num_episodes)
print('num_episodes: {}, mean_reward: {:.1f}, mean_steps: {:.1f}'.format(
    num_episodes, mean_rewards, mean_steps))

[32m[05-11 00:09:50 MainThread @machine_info.py:91][0m Cannot find available GPU devices, using CPU or other devices now.
[32m[05-11 00:09:50 MainThread @machine_info.py:91][0m Cannot find available GPU devices, using CPU or other devices now.
Episode 1/20 - Reward: 1784931.03, Num Steps: 2016
Episode 2/20 - Reward: 1923753.72, Num Steps: 2016
Episode 3/20 - Reward: 2094771.16, Num Steps: 2016
Episode 4/20 - Reward: 2247444.26, Num Steps: 2016
Episode 5/20 - Reward: 1584460.73, Num Steps: 2016
Episode 6/20 - Reward: 1763334.31, Num Steps: 2016
Episode 7/20 - Reward: 1840718.42, Num Steps: 2016
Episode 8/20 - Reward: 126961.36, Num Steps: 112
Episode 9/20 - Reward: 2236050.47, Num Steps: 2016
Episode 10/20 - Reward: 1573227.66, Num Steps: 2016
Episode 11/20 - Reward: 1752298.93, Num Steps: 2016
Episode 12/20 - Reward: 1803786.08, Num Steps: 2016
Episode 13/20 - Reward: 1980715.89, Num Steps: 2016
Episode 14/20 - Reward: 2229989.31, Num Steps: 2016
Episode 15/20 - Reward: 1591246.71,

## Random Opponent

### Attack duration 10 Attack Period 20

In [16]:
num_episodes = 20
env_dir=None
# how frequent the attack is.
# after each attack, next_attack_time is set to 1 + rnadint(attack_period
attack_period = 20
# how long the line is cooled down for after attack
attack_duration = 10

warnings.filterwarnings("ignore")
backend = LightSimBackend()
env = grid2op.make("l2rpn_neurips_2020_track2_small", backend=backend)

warnings.filterwarnings("default")
agent = Track2PowerNetAgent(env.action_space)
opponent = RandomOpponent(env.observation_space, env.action_space,
                          lines_to_attack=LINES, attack_period=attack_period,
                          attack_duration=attack_duration)
evaluator = Evaluator(env, agent, opponent)


mean_steps, mean_rewards = evaluator.run(opponent, num_episodes)
print('num_episodes: {}, mean_reward: {:.1f}, mean_steps: {:.1f}'.format(
    num_episodes, mean_rewards, mean_steps))

[32m[05-11 00:21:33 MainThread @machine_info.py:91][0m Cannot find available GPU devices, using CPU or other devices now.
[32m[05-11 00:21:33 MainThread @machine_info.py:91][0m Cannot find available GPU devices, using CPU or other devices now.
Episode 1/20 - Reward: 26890.87, Num Steps: 29
Episode 2/20 - Reward: 140632.68, Num Steps: 151
Episode 3/20 - Reward: 284931.33, Num Steps: 287
Episode 4/20 - Reward: 10922.02, Num Steps: 12
Episode 5/20 - Reward: 40884.80, Num Steps: 49
Episode 6/20 - Reward: 188252.51, Num Steps: 223
Episode 7/20 - Reward: 155524.95, Num Steps: 175
Episode 8/20 - Reward: 92000.06, Num Steps: 75
Episode 9/20 - Reward: 55505.84, Num Steps: 46
Episode 10/20 - Reward: 2390.87, Num Steps: 3
Episode 11/20 - Reward: 485423.69, Num Steps: 539
Episode 12/20 - Reward: 37539.17, Num Steps: 41
Episode 13/20 - Reward: 165887.59, Num Steps: 130
Episode 14/20 - Reward: 52080.29, Num Steps: 47
Episode 15/20 - Reward: 161303.69, Num Steps: 201
Episode 16/20 - Reward: 10848

In [17]:
evaluator.action_counter_arr

[{147: 1, 176: 1},
 {171: 1, 74: 1, 123: 1, 115: 1, 148: 1, 156: 1, 64: 1, 173: 1},
 {13: 1,
  39: 1,
  183: 1,
  70: 1,
  67: 1,
  10: 1,
  105: 1,
  78: 1,
  155: 1,
  24: 1,
  122: 1,
  169: 1,
  35: 1,
  51: 1,
  114: 1},
 {114: 1},
 {171: 1, 27: 1, 117: 1},
 {175: 1, 108: 1, 124: 1, 2: 1, 180: 1, 169: 1, 130: 1, 5: 1, 85: 1, 173: 1},
 {121: 1, 7: 1, 32: 1, 119: 1, 161: 1, 112: 1, 153: 1, 60: 1, 72: 1},
 {125: 1, 71: 1, 69: 1, 80: 1},
 {105: 1, 48: 1, 26: 1},
 {80: 1},
 {164: 1,
  25: 1,
  184: 2,
  144: 1,
  165: 1,
  153: 1,
  172: 1,
  50: 1,
  22: 1,
  137: 1,
  76: 1,
  68: 1,
  95: 1,
  54: 1,
  32: 1,
  47: 1,
  134: 1,
  70: 1,
  145: 1,
  52: 1,
  158: 1,
  183: 1,
  170: 1,
  15: 1},
 {53: 1, 26: 1},
 {86: 1, 50: 1, 171: 1, 157: 1, 117: 1, 148: 1, 111: 1},
 {30: 1, 80: 1},
 {8: 1, 91: 2, 90: 1, 100: 1, 126: 1, 70: 1, 144: 1, 73: 1},
 {44: 1, 118: 1, 17: 1, 35: 1, 122: 1, 174: 1},
 {175: 1,
  88: 1,
  2: 1,
  102: 1,
  13: 2,
  130: 1,
  89: 1,
  64: 1,
  100: 1,
  85: 1,


### Attack duration 20 Attack Period 50

In [18]:
num_episodes = 20
env_dir=None
# how frequent the attack is.
# after each attack, next_attack_time is set to 1 + rnadint(attack_period
attack_period = 50
# how long the line is cooled down for after attack
attack_duration = 20

warnings.filterwarnings("ignore")
backend = LightSimBackend()
env = grid2op.make("l2rpn_neurips_2020_track2_small", backend=backend)

warnings.filterwarnings("default")
agent = Track2PowerNetAgent(env.action_space)
opponent = RandomOpponent(env.observation_space, env.action_space,
                          lines_to_attack=LINES, attack_period=attack_period,
                          attack_duration=attack_duration)
evaluator = Evaluator(env, agent, opponent)

mean_steps, mean_rewards = evaluator.run(opponent, num_episodes)
print('num_episodes: {}, mean_reward: {:.1f}, mean_steps: {:.1f}'.format(
    num_episodes, mean_rewards, mean_steps))

[32m[05-11 00:23:41 MainThread @machine_info.py:91][0m Cannot find available GPU devices, using CPU or other devices now.
[32m[05-11 00:23:41 MainThread @machine_info.py:91][0m Cannot find available GPU devices, using CPU or other devices now.
Episode 1/20 - Reward: 62698.31, Num Steps: 65
Episode 2/20 - Reward: 216079.02, Num Steps: 242
Episode 3/20 - Reward: 277635.56, Num Steps: 288
Episode 4/20 - Reward: 384501.24, Num Steps: 323
Episode 5/20 - Reward: 845557.14, Num Steps: 1086
Episode 6/20 - Reward: 213064.99, Num Steps: 239
Episode 7/20 - Reward: 218694.56, Num Steps: 187
Episode 8/20 - Reward: 181349.08, Num Steps: 189
Episode 9/20 - Reward: 583771.16, Num Steps: 566
Episode 10/20 - Reward: 39190.21, Num Steps: 46
Episode 11/20 - Reward: 242028.34, Num Steps: 289
Episode 12/20 - Reward: 400995.56, Num Steps: 432
Episode 13/20 - Reward: 412517.49, Num Steps: 409
Episode 14/20 - Reward: 748098.96, Num Steps: 627
Episode 15/20 - Reward: 9047.41, Num Steps: 11
Episode 16/20 - R

In [19]:
evaluator.action_counter_arr

[{118: 1, 177: 1},
 {62: 1, 83: 1, 49: 1, 86: 1, 160: 1},
 {53: 1, 166: 1, 48: 1, 14: 1, 74: 1, 100: 1},
 {12: 1, 148: 1, 122: 1, 169: 1, 147: 1, 178: 1, 6: 1},
 {23: 2,
  146: 1,
  157: 1,
  89: 1,
  116: 1,
  126: 1,
  2: 1,
  178: 1,
  22: 1,
  124: 1,
  133: 1,
  62: 1,
  17: 2,
  50: 1,
  170: 1,
  11: 1,
  181: 1,
  153: 2,
  154: 1,
  148: 1,
  158: 1,
  163: 1},
 {56: 1, 150: 1, 145: 1, 75: 1, 173: 1},
 {91: 1, 149: 1, 1: 1, 163: 1, 177: 1},
 {178: 1, 155: 1, 156: 1, 38: 1, 129: 1},
 {87: 1,
  20: 1,
  180: 1,
  5: 1,
  43: 1,
  58: 1,
  125: 1,
  147: 1,
  120: 1,
  139: 1,
  27: 1,
  26: 1},
 {85: 1, 26: 1},
 {67: 1, 134: 1, 105: 1, 50: 1, 176: 1},
 {30: 1, 25: 1, 3: 1, 89: 1, 118: 1, 2: 1, 62: 1, 102: 1, 174: 1},
 {58: 1, 79: 1, 57: 1, 35: 1, 116: 1, 181: 1, 14: 1, 159: 1, 158: 1, 176: 1},
 {112: 1,
  156: 1,
  57: 1,
  99: 1,
  144: 1,
  126: 1,
  19: 1,
  165: 1,
  120: 1,
  104: 1,
  74: 1,
  129: 1},
 {26: 1},
 {163: 1},
 {141: 1, 88: 1, 171: 1, 127: 1, 66: 1, 80: 1},
 {

## Weighted Random Opponent

In [20]:
num_episodes = 20
env_dir=None
# how frequent the attack is.
# after each attack, next_attack_time is set to 1 + rnadint(attack_period
attack_period = 50
# how long the line is cooled down for after attack
attack_duration = 20

warnings.filterwarnings("ignore")
backend = LightSimBackend()
env = grid2op.make("l2rpn_neurips_2020_track2_small", backend=backend)

warnings.filterwarnings("default")
agent = Track2PowerNetAgent(env.action_space)
opponent = WeightedRandomOpponent(env.observation_space, env.action_space,
                          lines_to_attack=LINES, attack_period=attack_period,
                          attack_duration=attack_duration)
evaluator = Evaluator(env, agent, opponent)

mean_steps, mean_rewards = evaluator.run(opponent, num_episodes)
print('num_episodes: {}, mean_reward: {:.1f}, mean_steps: {:.1f}'.format(
    num_episodes, mean_rewards, mean_steps))

[32m[05-11 00:27:33 MainThread @machine_info.py:91][0m Cannot find available GPU devices, using CPU or other devices now.
[32m[05-11 00:27:34 MainThread @machine_info.py:91][0m Cannot find available GPU devices, using CPU or other devices now.
Episode 1/20 - Reward: 356978.85, Num Steps: 414
Episode 2/20 - Reward: 68087.71, Num Steps: 65
Episode 3/20 - Reward: 439429.19, Num Steps: 474
Episode 4/20 - Reward: 1027780.71, Num Steps: 954
Episode 5/20 - Reward: 155269.78, Num Steps: 183
Episode 6/20 - Reward: 220051.31, Num Steps: 266
Episode 7/20 - Reward: 1333186.67, Num Steps: 1416
Episode 8/20 - Reward: 141368.07, Num Steps: 136
Episode 9/20 - Reward: 199969.41, Num Steps: 160
Episode 10/20 - Reward: 571683.63, Num Steps: 711
Episode 11/20 - Reward: 904349.71, Num Steps: 1121
Episode 12/20 - Reward: 197292.64, Num Steps: 211
Episode 13/20 - Reward: 104276.51, Num Steps: 104
Episode 14/20 - Reward: 17346.85, Num Steps: 16
Episode 15/20 - Reward: 77427.90, Num Steps: 79
Episode 16/20

In [21]:
evaluator.action_counter_arr

[{8: 1, 66: 1, 182: 1, 23: 1, 14: 1, 56: 1, 136: 1, 127: 1},
 {25: 1, 26: 1},
 {166: 1, 119: 1, 180: 1, 40: 1, 182: 1, 16: 1, 27: 1, 86: 1, 77: 1, 72: 1},
 {109: 1,
  98: 1,
  62: 2,
  13: 2,
  31: 1,
  22: 2,
  94: 2,
  49: 1,
  125: 1,
  11: 1,
  163: 1,
  41: 1,
  42: 1,
  131: 1,
  7: 1,
  35: 1,
  99: 1,
  111: 1},
 {50: 1, 66: 1, 91: 1, 101: 1},
 {180: 1, 102: 1, 133: 1, 98: 1, 103: 1, 14: 1, 114: 1},
 {98: 1,
  90: 1,
  103: 1,
  81: 1,
  107: 1,
  152: 2,
  64: 1,
  82: 2,
  178: 1,
  163: 1,
  24: 1,
  52: 1,
  108: 1,
  166: 1,
  124: 1,
  47: 1,
  61: 1,
  138: 1,
  154: 1,
  168: 1,
  79: 1,
  112: 1,
  134: 1,
  132: 1,
  37: 1,
  75: 1,
  96: 1,
  125: 1,
  139: 1,
  40: 1,
  73: 1},
 {107: 1, 61: 1, 118: 1, 185: 1},
 {154: 1, 43: 1, 119: 1, 111: 1},
 {71: 2,
  33: 2,
  152: 1,
  137: 1,
  169: 1,
  124: 1,
  18: 1,
  145: 1,
  150: 1,
  130: 1,
  140: 1},
 {39: 1,
  115: 1,
  86: 1,
  109: 1,
  75: 1,
  169: 1,
  91: 1,
  31: 1,
  48: 1,
  180: 1,
  153: 1,
  54: 1,
  79