In [1]:
import os
import json
import math
import numpy as np
import tensorflow as tf
import torch

import grid2op
from grid2op import make
from grid2op.Runner import Runner
from grid2op.Reward import L2RPNSandBoxScore, L2RPNReward

from kaist_agent.Kaist import Kaist

from simple_opponents.random_opponent import RandomOpponent, WeightedRandomOpponent
from d3qn.adversary import D3QN_Opponent

In [2]:
def evaluate(env, agent, opponent, n_episodes, max_steps, verbose=False):
    reward_arr, n_survive_steps_arr = [], []
    for i_episode in range(1, n_episodes+1):
        step = 0
        obs = env.reset()
        agent.reset(obs)
        if opponent:
            opponent.reset(obs)
        total_reward = 0
        while step < max_steps:
            # agent act
            a = agent.act(obs, None, None)
            obs, reward, done, info = env.step(a)
            
            total_reward += reward
            if done:
                break

            # opponent attack
            if opponent:
                if opponent.remaining_time >= 0:
                    obs.time_before_cooldown_line[opponent.attack_line] = opponent.remaining_time
                    opponent.remaining_time -= 1
                    opponent.skip_attack(obs)
                else:
                    response = opponent.attack(obs)
                    if response is not None:
                        attack, a = response
                        obs, opp_reward, done, info = env.step(attack)
                        opponent.tell_attack_continues(None, None, None, None)
                    
            if done:
                break
            step += 1            
            
        reward_arr.append(total_reward)
        n_survive_steps_arr.append(step)
        
    if verbose:
        for i in range(1, n_episodes+1):
            print(f'Episode {i}/{n_episodes} - Reward: {reward_arr[i-1]:.2f}\t Number of steps survived: {n_survive_steps_arr[i-1]}')
        
    return reward_arr, n_survive_steps_arr

### Evaluate with no opponent

In [11]:
env_name = 'l2rpn_wcci_2020'
env = make(env_name, reward_class=L2RPNSandBoxScore,
           other_rewards={
               "reward": L2RPNReward
           })

agent_name = "kaist"
data_dir = os.path.join('kaist_agent/data')
with open(os.path.join(data_dir, 'param.json'), 'r', encoding='utf-8') as f:
    param = json.load(f)

state_mean = torch.load(os.path.join(data_dir, 'mean.pt'), map_location=param['device']).cpu()
state_std = torch.load(os.path.join(data_dir, 'std.pt'), map_location=param['device']).cpu()
state_std = state_std.masked_fill(state_std<1e-5, 1.)
state_mean[0, sum(env.observation_space.shape[:20]):] = 0
state_std[0, sum(env.observation_space.shape[:20]):] = 1
agent = Kaist(env, state_mean, state_std, name=agent_name, **param)
agent.sim_trial = 0
agent.load_model(data_dir)

n_episodes = 20
n_max_steps = 150

reward_arr, n_survive_steps_arr = evaluate(env, agent, None, n_episodes, n_max_steps, verbose=True)
print()
print('Average reward: {:.2f}\t Average number of steps survived: {}'.format(np.mean(reward_arr), np.mean(n_survive_steps_arr)))

O: 72 S: 128 A: 108 (19)
Episode 1/20 - Reward: 53077.66	 Number of steps survived: 150
Episode 2/20 - Reward: 43475.25	 Number of steps survived: 150
Episode 3/20 - Reward: 50524.64	 Number of steps survived: 150
Episode 4/20 - Reward: 55378.95	 Number of steps survived: 150
Episode 5/20 - Reward: 44330.47	 Number of steps survived: 150
Episode 6/20 - Reward: 50290.00	 Number of steps survived: 150
Episode 7/20 - Reward: 62669.68	 Number of steps survived: 150
Episode 8/20 - Reward: 53732.96	 Number of steps survived: 150
Episode 9/20 - Reward: 49542.81	 Number of steps survived: 150
Episode 10/20 - Reward: 47788.03	 Number of steps survived: 150
Episode 11/20 - Reward: 53296.05	 Number of steps survived: 150
Episode 12/20 - Reward: 44505.76	 Number of steps survived: 150
Episode 13/20 - Reward: 45917.02	 Number of steps survived: 150
Episode 14/20 - Reward: 44120.46	 Number of steps survived: 150
Episode 15/20 - Reward: 51244.66	 Number of steps survived: 150
Episode 16/20 - Reward: 

### Evaluate with random opponent

In [12]:
env_name = 'l2rpn_wcci_2020'
env = make(env_name, reward_class=L2RPNSandBoxScore,
           other_rewards={
               "reward": L2RPNReward
           })

agent_name = "kaist"
data_dir = os.path.join('kaist_agent/data')
with open(os.path.join(data_dir, 'param.json'), 'r', encoding='utf-8') as f:
    param = json.load(f)

state_mean = torch.load(os.path.join(data_dir, 'mean.pt'), map_location=param['device']).cpu()
state_std = torch.load(os.path.join(data_dir, 'std.pt'), map_location=param['device']).cpu()
state_std = state_std.masked_fill(state_std<1e-5, 1.)
state_mean[0, sum(env.observation_space.shape[:20]):] = 0
state_std[0, sum(env.observation_space.shape[:20]):] = 1
agent = Kaist(env, state_mean, state_std, name=agent_name, **param)
agent.sim_trial = 0
agent.load_model(data_dir)

# opponent hyperparameters
attack_period = 20
attack_duration = 10

lines = ['0_4_2', '10_11_11', '11_12_13', '12_13_14', '12_16_20', 
            '13_14_15', '13_15_16', '14_16_17', '14_35_53', '15_16_21', 
            '16_17_22', '16_18_23', '16_21_27', '16_21_28', '16_33_48', 
            '16_33_49', '16_35_54', '17_24_33', '18_19_24', '18_25_35', 
            '19_20_25', '1_10_12', '1_3_3', '1_4_4', '20_21_26', 
            '21_22_29', '21_23_30', '21_26_36', '22_23_31', '22_26_39', 
            '23_24_32', '23_25_34', '23_26_37', '23_26_38', '26_27_40', 
            '26_28_41', '26_30_56', '27_28_42', '27_29_43', '28_29_44', 
            '28_31_57', '29_33_50', '29_34_51', '2_3_0', '2_4_1', 
            '30_31_45', '31_32_47', '32_33_58', '33_34_52', '4_5_55', 
            '4_6_5', '4_7_6', '5_32_46', '6_7_7', '7_8_8', 
            '7_9_9', '8_9_10', '9_16_18', '9_16_19']

opponent = RandomOpponent(env.observation_space, env.action_space, lines_attacked=lines,
                          attack_period=attack_period, attack_duration=10)
# simulation hyperparameters
n_episodes = 20
n_max_steps = 150

reward_arr, n_survive_steps_arr = evaluate(env, agent, opponent, n_episodes, n_max_steps, verbose=True)
print()
print('Average reward: {:.2f}\t Average number of steps survived: {}'.format(np.mean(reward_arr), np.mean(n_survive_steps_arr)))

O: 72 S: 128 A: 108 (19)
Episode 1/20 - Reward: 41048.85	 Number of steps survived: 100
Episode 2/20 - Reward: 49615.94	 Number of steps survived: 150
Episode 3/20 - Reward: 52418.49	 Number of steps survived: 150
Episode 4/20 - Reward: 11846.12	 Number of steps survived: 21
Episode 5/20 - Reward: 44652.72	 Number of steps survived: 150
Episode 6/20 - Reward: 10040.69	 Number of steps survived: 23
Episode 7/20 - Reward: 63276.04	 Number of steps survived: 150
Episode 8/20 - Reward: 48217.50	 Number of steps survived: 125
Episode 9/20 - Reward: 32036.53	 Number of steps survived: 93
Episode 10/20 - Reward: 8407.49	 Number of steps survived: 19
Episode 11/20 - Reward: 50140.47	 Number of steps survived: 135
Episode 12/20 - Reward: 46000.21	 Number of steps survived: 150
Episode 13/20 - Reward: 49232.10	 Number of steps survived: 150
Episode 14/20 - Reward: 15681.14	 Number of steps survived: 31
Episode 15/20 - Reward: 58623.07	 Number of steps survived: 150
Episode 16/20 - Reward: 47650.

### Evaluate with random weighted opponent

In [13]:
env_name = 'l2rpn_wcci_2020'
env = make(env_name, reward_class=L2RPNSandBoxScore,
           other_rewards={
               "reward": L2RPNReward
           })

agent_name = "kaist"
data_dir = os.path.join('kaist_agent/data')
with open(os.path.join(data_dir, 'param.json'), 'r', encoding='utf-8') as f:
    param = json.load(f)

state_mean = torch.load(os.path.join(data_dir, 'mean.pt'), map_location=param['device']).cpu()
state_std = torch.load(os.path.join(data_dir, 'std.pt'), map_location=param['device']).cpu()
state_std = state_std.masked_fill(state_std<1e-5, 1.)
state_mean[0, sum(env.observation_space.shape[:20]):] = 0
state_std[0, sum(env.observation_space.shape[:20]):] = 1
agent = Kaist(env, state_mean, state_std, name=agent_name, **param)
agent.sim_trial = 0
agent.load_model(data_dir)

# opponent hyperparameters
attack_period = 20
attack_duration = 10

lines = ['0_4_2', '10_11_11', '11_12_13', '12_13_14', '12_16_20', 
            '13_14_15', '13_15_16', '14_16_17', '14_35_53', '15_16_21', 
            '16_17_22', '16_18_23', '16_21_27', '16_21_28', '16_33_48', 
            '16_33_49', '16_35_54', '17_24_33', '18_19_24', '18_25_35', 
            '19_20_25', '1_10_12', '1_3_3', '1_4_4', '20_21_26', 
            '21_22_29', '21_23_30', '21_26_36', '22_23_31', '22_26_39', 
            '23_24_32', '23_25_34', '23_26_37', '23_26_38', '26_27_40', 
            '26_28_41', '26_30_56', '27_28_42', '27_29_43', '28_29_44', 
            '28_31_57', '29_33_50', '29_34_51', '2_3_0', '2_4_1', 
            '30_31_45', '31_32_47', '32_33_58', '33_34_52', '4_5_55', 
            '4_6_5', '4_7_6', '5_32_46', '6_7_7', '7_8_8', 
            '7_9_9', '8_9_10', '9_16_18', '9_16_19']

opponent = WeightedRandomOpponent(env.observation_space, env.action_space, lines_attacked=lines,
                                  attack_period=attack_period, attack_duration=10)
# simulation hyperparameters
n_episodes = 20
n_max_steps = 150

reward_arr, n_survive_steps_arr = evaluate(env, agent, opponent, n_episodes, n_max_steps, verbose=True)
print()
print('Average reward: {:.2f}\t Average number of steps survived: {}'.format(np.mean(reward_arr), np.mean(n_survive_steps_arr)))

O: 72 S: 128 A: 108 (19)
Episode 1/20 - Reward: 34898.01	 Number of steps survived: 92
Episode 2/20 - Reward: 23998.14	 Number of steps survived: 64
Episode 3/20 - Reward: 40293.94	 Number of steps survived: 116
Episode 4/20 - Reward: 55989.25	 Number of steps survived: 150
Episode 5/20 - Reward: 21619.62	 Number of steps survived: 64
Episode 6/20 - Reward: 36729.14	 Number of steps survived: 105
Episode 7/20 - Reward: 64594.12	 Number of steps survived: 150
Episode 8/20 - Reward: 36117.51	 Number of steps survived: 100
Episode 9/20 - Reward: 52576.48	 Number of steps survived: 150
Episode 10/20 - Reward: 8267.36	 Number of steps survived: 14
Episode 11/20 - Reward: 54584.61	 Number of steps survived: 150
Episode 12/20 - Reward: 20399.39	 Number of steps survived: 60
Episode 13/20 - Reward: 14112.05	 Number of steps survived: 16
Episode 14/20 - Reward: 44941.41	 Number of steps survived: 150
Episode 15/20 - Reward: 56110.97	 Number of steps survived: 148
Episode 16/20 - Reward: 8135.84

### Evaluate with untrained D3QN

In [3]:
env_name = 'l2rpn_wcci_2020'
env = make(env_name, reward_class=L2RPNSandBoxScore,
           other_rewards={
               "reward": L2RPNReward
           })

agent_name = "kaist"
data_dir = os.path.join('kaist_agent/data')
with open(os.path.join(data_dir, 'param.json'), 'r', encoding='utf-8') as f:
    param = json.load(f)

state_mean = torch.load(os.path.join(data_dir, 'mean.pt'), map_location=param['device']).cpu()
state_std = torch.load(os.path.join(data_dir, 'std.pt'), map_location=param['device']).cpu()
state_std = state_std.masked_fill(state_std<1e-5, 1.)
state_mean[0, sum(env.observation_space.shape[:20]):] = 0
state_std[0, sum(env.observation_space.shape[:20]):] = 1
agent = Kaist(env, state_mean, state_std, name=agent_name, **param)
agent.sim_trial = 0
agent.load_model(data_dir)

# opponent hyperparameters
attack_period = 20
attack_duration = 10

lines = ['0_4_2', '10_11_11', '11_12_13', '12_13_14', '12_16_20', 
            '13_14_15', '13_15_16', '14_16_17', '14_35_53', '15_16_21', 
            '16_17_22', '16_18_23', '16_21_27', '16_21_28', '16_33_48', 
            '16_33_49', '16_35_54', '17_24_33', '18_19_24', '18_25_35', 
            '19_20_25', '1_10_12', '1_3_3', '1_4_4', '20_21_26', 
            '21_22_29', '21_23_30', '21_26_36', '22_23_31', '22_26_39', 
            '23_24_32', '23_25_34', '23_26_37', '23_26_38', '26_27_40', 
            '26_28_41', '26_30_56', '27_28_42', '27_29_43', '28_29_44', 
            '28_31_57', '29_33_50', '29_34_51', '2_3_0', '2_4_1', 
            '30_31_45', '31_32_47', '32_33_58', '33_34_52', '4_5_55', 
            '4_6_5', '4_7_6', '5_32_46', '6_7_7', '7_8_8', 
            '7_9_9', '8_9_10', '9_16_18', '9_16_19']

opponent = D3QN_Opponent(env.action_space, env.observation_space, lines_attacked=lines, attack_period=attack_period,
                attack_duration=attack_duration,is_training=False)

# simulation hyperparameters
n_episodes = 20
n_max_steps = 150

reward_arr, n_survive_steps_arr = evaluate(env, agent, opponent, n_episodes, n_max_steps, verbose=True)
print()
print('Average reward: {:.2f}\t Average number of steps survived: {}'.format(np.mean(reward_arr), np.mean(n_survive_steps_arr)))

O: 72 S: 128 A: 108 (19)
['2_3_0' '2_4_1' '0_4_2' '1_3_3' '1_4_4' '4_6_5' '4_7_6' '6_7_7' '7_8_8'
 '7_9_9' '8_9_10' '10_11_11' '1_10_12' '11_12_13' '12_13_14' '13_14_15'
 '13_15_16' '14_16_17' '9_16_18' '9_16_19' '12_16_20' '15_16_21'
 '16_17_22' '16_18_23' '18_19_24' '19_20_25' '20_21_26' '16_21_27'
 '16_21_28' '21_22_29' '21_23_30' '22_23_31' '23_24_32' '17_24_33'
 '23_25_34' '18_25_35' '21_26_36' '23_26_37' '23_26_38' '22_26_39'
 '26_27_40' '26_28_41' '27_28_42' '27_29_43' '28_29_44' '30_31_45'
 '5_32_46' '31_32_47' '16_33_48' '16_33_49' '29_33_50' '29_34_51'
 '33_34_52' '14_35_53' '16_35_54' '4_5_55' '26_30_56' '28_31_57'
 '32_33_58']
Episode 1/20 - Reward: 48441.76	 Number of steps survived: 150
Episode 2/20 - Reward: 42313.91	 Number of steps survived: 150
Episode 3/20 - Reward: 47742.77	 Number of steps survived: 150
Episode 4/20 - Reward: 50194.99	 Number of steps survived: 150
Episode 5/20 - Reward: 39644.38	 Number of steps survived: 150
Episode 6/20 - Reward: 46314.69	 Numbe