In [2]:
import grid2op
import lightsim2grid
import warnings

from lightsim2grid.LightSimBackend import LightSimBackend
import numpy as np
from agent import Track2PowerNetAgent

from simple_opponents.random_opponent import RandomOpponent, WeightedRandomOpponent
from simple_opponents.nothing_opponent import DoNothingOpponent

MAX_TIMESTEP = 7 * 288

LINES = ['0_1_0', '0_2_1', '10_11_2', '69_70_3', '23_71_4', '70_71_5',
       '70_72_6', '69_73_7', '69_74_8', '68_74_9', '73_74_10', '75_76_11',
       '68_76_12', '1_11_13', '74_76_14', '76_77_15', '77_78_16',
       '76_79_17', '76_79_18', '78_79_19', '76_81_20', '81_82_21',
       '82_83_22', '82_84_23', '2_11_24', '83_84_25', '84_85_26',
       '84_87_27', '84_88_28', '87_88_29', '88_89_30', '88_89_31',
       '89_90_32', '88_91_33', '88_91_34', '6_11_35', '90_91_36',
       '91_92_37', '91_93_38', '92_93_39', '93_94_40', '79_95_41',
       '81_95_42', '93_95_43', '79_96_44', '79_97_45', '10_12_46',
       '79_98_47', '91_99_48', '93_99_49', '94_95_50', '95_96_51',
       '97_99_52', '98_99_53', '99_100_54', '91_101_55', '100_101_56',
       '11_13_57', '99_102_58', '99_103_59', '102_103_60', '102_104_61',
       '99_105_62', '103_104_63', '104_105_64', '104_106_65',
       '104_107_66', '105_106_67', '12_14_68', '107_108_69', '102_109_70',
       '108_109_71', '109_110_72', '109_111_73', '16_112_74', '31_112_75',
       '31_113_76', '26_114_77', '113_114_78', '13_14_79', '11_116_80',
       '74_117_81', '75_117_82', '11_15_83', '14_16_84', '3_4_85',
       '15_16_86', '16_17_87', '17_18_88', '18_19_89', '14_18_90',
       '19_20_91', '20_21_92', '21_22_93', '22_23_94', '22_24_95',
       '2_4_96', '24_26_97', '26_27_98', '27_28_99', '7_29_100',
       '25_29_101', '16_30_102', '28_30_103', '22_31_104', '30_31_105',
       '26_31_106', '4_5_107', '14_32_108', '18_33_109', '34_35_110',
       '34_36_111', '32_36_112', '33_35_113', '33_36_114', '36_38_115',
       '36_39_116', '29_37_117', '5_6_118', '38_39_119', '39_40_120',
       '39_41_121', '40_41_122', '42_43_123', '33_42_124', '43_44_125',
       '44_45_126', '45_46_127', '45_47_128', '7_8_129', '46_48_130',
       '41_48_131', '41_48_132', '44_48_133', '47_48_134', '48_49_135',
       '48_50_136', '50_51_137', '51_52_138', '52_53_139', '8_9_140',
       '48_53_141', '48_53_142', '53_54_143', '53_55_144', '54_55_145',
       '55_56_146', '49_56_147', '55_57_148', '50_57_149', '53_58_150',
       '3_10_151', '55_58_152', '55_58_153', '54_58_154', '58_59_155',
       '58_60_156', '59_60_157', '59_61_158', '60_61_159', '62_63_160',
       '37_64_161', '4_10_162', '63_64_163', '48_65_164', '48_65_165',
       '61_65_166', '61_66_167', '65_66_168', '46_68_169', '48_68_170',
       '68_69_171', '23_69_172', '7_4_173', '25_24_174', '80_79_175',
       '86_85_176', '115_67_177', '29_16_178', '37_36_179', '62_58_180',
       '63_60_181', '64_65_182', '64_67_183', '67_68_184', '80_67_185']

In [3]:
class Evaluator(object):
    def __init__(self, env, agent, opponent):
        self.env = env
        self.agent = agent
        self.opponent = opponent

    def run(self, opponent, num_episodes):
        steps_buffer = []
        rewards_buffer = []

        for i_episode in range(num_episodes):
            _ = self.env.reset()
            if opponent:
                opponent.reset()
            max_day = (
                self.env.chronics_handler.max_timestep() - MAX_TIMESTEP) // 288
            start_timestep = np.random.randint(
                max_day) * 288 - 1  # start at 00:00
            if start_timestep > 0:
                self.env.fast_forward_chronics(start_timestep)

            obs = self.env.get_obs()
            done = False
            steps = 0
            rewards = 0
            while not done:
                action = self.agent.act(obs, None, None)
                obs, reward, done, info = self.env.step(action)
                assert not info['is_illegal'] and not info['is_ambiguous']
                rewards += reward
                steps += 1
                
                if done: # to prevent opponent from taking action on finished episode
                    break
                
                if opponent:
                    opponent.take_step(obs)
                    if opponent.remaining_time >= 0:
                        obs.time_before_cooldown_line[opponent.attack_line] = opponent.remaining_time
                        opponent.remaining_time -= 1
                    else: # attack (only one disconnection at a time)
                        response = opponent.attack(obs)
                        if response is not None:
                            attack, a = response
                            obs, opp_reward, done, info = env.step(attack)
                            opponent.tell_attack_continues()
                
                if steps >= MAX_TIMESTEP:
                    break
            steps_buffer.append(steps)
            rewards_buffer.append(rewards)
            print(f'Episode {i_episode+1}/{num_episodes} - Reward: {rewards:.2f}, Num Steps: {steps}')

        return np.mean(steps_buffer), np.mean(rewards_buffer)

## DoNothing Opponent

In [3]:
num_episodes = 10
env_dir=None
# how frequent the attack is.
# after each attack, next_attack_time is set to 1 + rnadint(attack_period
attack_period = 20
# how long the line is cooled down for after attack
attack_duration = 10

warnings.filterwarnings("ignore")
backend = LightSimBackend()
env = grid2op.make("l2rpn_neurips_2020_track2_small", backend=backend)

warnings.filterwarnings("default")
agent = Track2PowerNetAgent(env.action_space)
opponent = DoNothingOpponent(env.observation_space, env.action_space)
evaluator = Evaluator(env, agent, opponent)

mean_steps, mean_rewards = evaluator.run(opponent, num_episodes)
print('num_episodes: {}, mean_reward: {:.1f}, mean_steps: {:.1f}'.format(
    num_episodes, mean_rewards, mean_steps))

[32m[05-10 17:03:47 MainThread @machine_info.py:91][0m Cannot find available GPU devices, using CPU or other devices now.
[32m[05-10 17:03:47 MainThread @machine_info.py:91][0m Cannot find available GPU devices, using CPU or other devices now.
Episode 1/10 - Reward: 1778846.55, Num Steps: 2016
Episode 2/10 - Reward: 1809349.58, Num Steps: 2016
Episode 3/10 - Reward: 1987477.34, Num Steps: 2016
Episode 4/10 - Reward: 2086845.85, Num Steps: 2016
Episode 5/10 - Reward: 1499925.76, Num Steps: 2016
Episode 6/10 - Reward: 1728951.19, Num Steps: 2016
Episode 7/10 - Reward: 1840716.78, Num Steps: 2016
Episode 8/10 - Reward: 1937603.70, Num Steps: 2016
Episode 9/10 - Reward: 1936437.79, Num Steps: 1862
Episode 10/10 - Reward: 1617793.14, Num Steps: 2016
num_episodes: 10, mean_reward: 1822394.8, mean_steps: 2000.6


## Random Opponent

### Attack duration 10 Attack Period 20

In [3]:
num_episodes = 10
env_dir=None
# how frequent the attack is.
# after each attack, next_attack_time is set to 1 + rnadint(attack_period
attack_period = 20
# how long the line is cooled down for after attack
attack_duration = 10

warnings.filterwarnings("ignore")
backend = LightSimBackend()
env = grid2op.make("l2rpn_neurips_2020_track2_small", backend=backend)

warnings.filterwarnings("default")
agent = Track2PowerNetAgent(env.action_space)
opponent = RandomOpponent(env.observation_space, env.action_space,
                          lines_to_attack=LINES, attack_period=attack_period,
                          attack_duration=attack_duration)
evaluator = Evaluator(env, agent, opponent)


mean_steps, mean_rewards = evaluator.run(opponent, num_episodes)
print('num_episodes: {}, mean_reward: {:.1f}, mean_steps: {:.1f}'.format(
    num_episodes, mean_rewards, mean_steps))

[32m[05-10 17:00:28 MainThread @machine_info.py:91][0m Cannot find available GPU devices, using CPU or other devices now.
[32m[05-10 17:00:28 MainThread @machine_info.py:91][0m Cannot find available GPU devices, using CPU or other devices now.
Episode 1/10 - Reward: 16238.20, Num Steps: 19
Episode 2/10 - Reward: 488211.41, Num Steps: 495
Episode 3/10 - Reward: 3972.33, Num Steps: 5
Episode 4/10 - Reward: 163708.61, Num Steps: 121
Episode 5/10 - Reward: 60810.63, Num Steps: 77
Episode 6/10 - Reward: 16531.25, Num Steps: 17
Episode 7/10 - Reward: 44264.17, Num Steps: 46
Episode 8/10 - Reward: 125948.18, Num Steps: 121
Episode 9/10 - Reward: 77563.35, Num Steps: 67
Episode 10/10 - Reward: 98247.23, Num Steps: 121
num_episodes: 10, mean_reward: 109549.5, mean_steps: 108.9


### Attack duration 10 Attack Period 40

In [4]:
num_episodes = 10
env_dir=None
# how frequent the attack is.
# after each attack, next_attack_time is set to 1 + rnadint(attack_period
attack_period = 20
# how long the line is cooled down for after attack
attack_duration = 10

warnings.filterwarnings("ignore")
backend = LightSimBackend()
env = grid2op.make("l2rpn_neurips_2020_track2_small", backend=backend)

warnings.filterwarnings("default")
agent = Track2PowerNetAgent(env.action_space)
opponent = RandomOpponent(env.observation_space, env.action_space,
                          lines_to_attack=LINES, attack_period=attack_period,
                          attack_duration=attack_duration)
evaluator = Evaluator(env, agent, opponent)


mean_steps, mean_rewards = evaluator.run(opponent, num_episodes)
print('num_episodes: {}, mean_reward: {:.1f}, mean_steps: {:.1f}'.format(
    num_episodes, mean_rewards, mean_steps))

[32m[05-10 17:23:16 MainThread @machine_info.py:91][0m Cannot find available GPU devices, using CPU or other devices now.
[32m[05-10 17:23:16 MainThread @machine_info.py:91][0m Cannot find available GPU devices, using CPU or other devices now.
Episode 1/10 - Reward: 374389.35, Num Steps: 433
Episode 2/10 - Reward: 42834.84, Num Steps: 45
Episode 3/10 - Reward: 15466.51, Num Steps: 15
Episode 4/10 - Reward: 330309.99, Num Steps: 296
Episode 5/10 - Reward: 113171.64, Num Steps: 139
Episode 6/10 - Reward: 30503.17, Num Steps: 33
Episode 7/10 - Reward: 25215.44, Num Steps: 27
Episode 8/10 - Reward: 747448.00, Num Steps: 801
Episode 9/10 - Reward: 67531.68, Num Steps: 56
Episode 10/10 - Reward: 65596.09, Num Steps: 79
num_episodes: 10, mean_reward: 181246.7, mean_steps: 192.4


## Weighted Random Opponent

In [4]:
num_episodes = 10
env_dir=None
# how frequent the attack is.
# after each attack, next_attack_time is set to 1 + rnadint(attack_period
attack_period = 20
# how long the line is cooled down for after attack
attack_duration = 10

warnings.filterwarnings("ignore")
backend = LightSimBackend()
env = grid2op.make("l2rpn_neurips_2020_track2_small", backend=backend)

warnings.filterwarnings("default")
agent = Track2PowerNetAgent(env.action_space)
opponent = WeightedRandomOpponent(env.observation_space, env.action_space,
                          lines_to_attack=LINES, attack_period=attack_period,
                          attack_duration=attack_duration)
evaluator = Evaluator(env, agent, opponent)

mean_steps, mean_rewards = evaluator.run(opponent, num_episodes)
print('num_episodes: {}, mean_reward: {:.1f}, mean_steps: {:.1f}'.format(
    num_episodes, mean_rewards, mean_steps))

[32m[05-10 17:33:31 MainThread @machine_info.py:91][0m Cannot find available GPU devices, using CPU or other devices now.
[32m[05-10 17:33:32 MainThread @machine_info.py:91][0m Cannot find available GPU devices, using CPU or other devices now.
Episode 1/10 - Reward: 325071.60, Num Steps: 373
Episode 2/10 - Reward: 89495.40, Num Steps: 94
Episode 3/10 - Reward: 23506.55, Num Steps: 20
Episode 4/10 - Reward: 413562.58, Num Steps: 377
Episode 5/10 - Reward: 202184.94, Num Steps: 273
Episode 6/10 - Reward: 43734.54, Num Steps: 49
Episode 7/10 - Reward: 66632.87, Num Steps: 61
Episode 8/10 - Reward: 391762.96, Num Steps: 339
Episode 9/10 - Reward: 6598.69, Num Steps: 6
Episode 10/10 - Reward: 11789.19, Num Steps: 16
num_episodes: 10, mean_reward: 157433.9, mean_steps: 160.8
