In [5]:
import os
import json
import math
import numpy as np
import tensorflow as tf
import torch

import grid2op
from d3qn.adversary import D3QN_Opponent
from grid2op.Agent import DoNothingAgent
from grid2op.Action import TopologyChangeAndDispatchAction
from grid2op.Reward import CombinedScaledReward, L2RPNSandBoxScore, L2RPNReward, GameplayReward
from l2rpn_baselines.DoubleDuelingDQN.DoubleDuelingDQNConfig import DoubleDuelingDQNConfig as cfg

from kaist_agent.Kaist import Kaist

In [6]:
MAX_TIMESTEP = 7 * 288

def train_adversary(env, agent, opponent, num_pre_training_steps, n_iter, save_path, log_path):
    # Make sure we can fill the experience buffer
    if num_pre_training_steps < opponent.batch_size * opponent.num_frames:
        num_pre_training_steps = opponent.batch_size * opponent.num_frames
        
    # Loop vars
    num_training_steps = n_iter
    num_steps = num_pre_training_steps + num_training_steps
    step = 0
    alive_steps = 0
    total_reward = 0
    done = True
    print(f"Total number of steps: {num_steps}")

    # Create file system related vars
    logpath = os.path.join(log_path, opponent.name)
    os.makedirs(save_path, exist_ok=True)
    modelpath = os.path.join(save_path, opponent.name + ".h5")
    opponent.tf_writer = tf.summary.create_file_writer(logpath, name=opponent.name)
    opponent._save_hyperparameters(save_path, env, num_steps)
    
    while step < num_steps:
        # Init first time or new episode
        if done:
            new_obs = env.reset() # This shouldn't raise
            agent.reset(new_obs)
            opponent.reset(new_obs)
            done = False
        if cfg.VERBOSE and step % 1000 == 0:
            print("Step [{}] -- Random [{}]".format(step, opponent.epsilon))

        # Save current observation to stacking buffer
        opponent._save_current_frame(opponent.state)

        # Execute attack if allowed
        if step <= num_pre_training_steps:
            opponent.remaining_time = 0
            attack, a = opponent._do_nothing, 0
        else:
            attack, a = opponent.attack(new_obs)

        if a != 0:
#             print(f'ATTACK step {step}: disconnected {a}')
            attack_obs, opp_reward, done, info = env.step(attack)
            if info["is_illegal"] or info["is_ambiguous"] or \
               info["is_dispatching_illegal"] or info["is_illegal_reco"]:
                if cfg.VERBOSE:
                    print(attack, info)
            new_obs = attack_obs
            opponent.tell_attack_continues(None, None, None, None)

        while opponent.remaining_time >= 0 and not done:
            new_obs.time_before_cooldown_line[opponent.attack_line] = opponent.remaining_time
            response = agent.act(new_obs, None, None)
            new_obs, reward, done, info = env.step(response)
            opponent.remaining_time -= 1
            total_reward += reward
            alive_steps += 1
        
        # Save new observation to stacking buffer
        new_state = opponent.convert_obs(new_obs)
        opponent._save_next_frame(new_state)

        # Save to experience buffer
        if len(opponent.frames2) == opponent.num_frames:
            opponent.per_buffer.add(np.array(opponent.frames),
                                a, -1 * reward,
                                np.array(opponent.frames2),
                                opponent.done)


        # Perform training when we have enough experience in buffer
        if step >= num_pre_training_steps:
            training_step = step - num_pre_training_steps
            # Decay chance of random action
            opponent.epsilon = opponent._adaptive_epsilon_decay(training_step)

            # Perform training at given frequency
            if step % cfg.UPDATE_FREQ == 0 and \
               len(opponent.per_buffer) >= opponent.batch_size:
                # Perform training
                opponent._batch_train(training_step, step)

                if cfg.UPDATE_TARGET_SOFT_TAU > 0.0:
                    tau = cfg.UPDATE_TARGET_SOFT_TAU
                    # Update target network towards primary network
                    opponent.policy_net.update_target_soft(opponent.target_net.model, tau)

            # Every UPDATE_TARGET_HARD_FREQ trainings, update target completely
            if cfg.UPDATE_TARGET_HARD_FREQ > 0 and \
               step % (cfg.UPDATE_FREQ * cfg.UPDATE_TARGET_HARD_FREQ) == 0:
                opponent.policy_net.update_target_hard(opponent.target_net.model)
        
        if done:
            opponent.epoch_rewards.append(-1 * total_reward)
            opponent.epoch_alive.append(alive_steps)
            if cfg.VERBOSE and step > num_pre_training_steps:
                print("step {}: Agent survived [{}] steps with reward {}".format(step, alive_steps, total_reward))
            alive_steps = 0
            total_reward = 0         
        else:
            alive_steps += 1
            
        ######## After Each Step #######
        if step > 0 and step % 2000 == 0: # save network every 5000 iters
            opponent.save(modelpath)
        step += 1
        # Make new obs the current obs
        opponent.obs = new_obs
        opponent.state = new_state

    # Save model after all steps
    opponent.save(modelpath)

In [7]:
env_name = 'l2rpn_wcci_2020'
env = grid2op.make(env_name, reward_class=CombinedScaledReward)

# Agent 
agent_name = "kaist"
data_dir = os.path.join('kaist_agent/data')
with open(os.path.join(data_dir, 'param.json'), 'r', encoding='utf-8') as f:
    param = json.load(f)
print(param)
state_mean = torch.load(os.path.join(data_dir, 'mean.pt'), map_location=param['device']).cpu()
state_std = torch.load(os.path.join(data_dir, 'std.pt'), map_location=param['device']).cpu()
state_std = state_std.masked_fill(state_std<1e-5, 1.)
state_mean[0, sum(env.observation_space.shape[:20]):] = 0
state_std[0, sum(env.observation_space.shape[:20]):] = 1
agent = Kaist(env, state_mean, state_std, name=agent_name, **param)
agent.sim_trial = 0
agent.load_model(data_dir)

# Opponent 
opponent_name = "D3QN_kaist"
num_pre_training_steps = 256
learning_rate = 5e-5
initial_epsilon = 0.99
final_epsilon = 0.01
decay_epsilon = 5000
attack_period = 20
lines = ['0_4_2', '10_11_11', '11_12_13', '12_13_14', '12_16_20', 
            '13_14_15', '13_15_16', '14_16_17', '14_35_53', '15_16_21', 
            '16_17_22', '16_18_23', '16_21_27', '16_21_28', '16_33_48', 
            '16_33_49', '16_35_54', '17_24_33', '18_19_24', '18_25_35', 
            '19_20_25', '1_10_12', '1_3_3', '1_4_4', '20_21_26', 
            '21_22_29', '21_23_30', '21_26_36', '22_23_31', '22_26_39', 
            '23_24_32', '23_25_34', '23_26_37', '23_26_38', '26_27_40', 
            '26_28_41', '26_30_56', '27_28_42', '27_29_43', '28_29_44', 
            '28_31_57', '29_33_50', '29_34_51', '2_3_0', '2_4_1', 
            '30_31_45', '31_32_47', '32_33_58', '33_34_52', '4_5_55', 
            '4_6_5', '4_7_6', '5_32_46', '6_7_7', '7_8_8', 
            '7_9_9', '8_9_10', '9_16_18', '9_16_19']

opponent = D3QN_Opponent(env.action_space, env.observation_space, lines_attacked=lines, attack_period=attack_period,
            name=opponent_name, is_training=True, learning_rate=learning_rate,
            initial_epsilon=initial_epsilon, final_epsilon=final_epsilon, decay_epsilon=decay_epsilon)

{'head_number': 8, 'n_history': 12, 'state_dim': 128, 'dropout': 0.0, 'sim_trial': 15, 'threshold': 0.35, 'max_low_len': 19, 'danger': 0.9, 'mask': 3, 'mask_hi': 19, 'use_order': True, 'device': 'cpu'}
O: 72 S: 128 A: 108 (19)
['2_3_0' '2_4_1' '0_4_2' '1_3_3' '1_4_4' '4_6_5' '4_7_6' '6_7_7' '7_8_8'
 '7_9_9' '8_9_10' '10_11_11' '1_10_12' '11_12_13' '12_13_14' '13_14_15'
 '13_15_16' '14_16_17' '9_16_18' '9_16_19' '12_16_20' '15_16_21'
 '16_17_22' '16_18_23' '18_19_24' '19_20_25' '20_21_26' '16_21_27'
 '16_21_28' '21_22_29' '21_23_30' '22_23_31' '23_24_32' '17_24_33'
 '23_25_34' '18_25_35' '21_26_36' '23_26_37' '23_26_38' '22_26_39'
 '26_27_40' '26_28_41' '27_28_42' '27_29_43' '28_29_44' '30_31_45'
 '5_32_46' '31_32_47' '16_33_48' '16_33_49' '29_33_50' '29_34_51'
 '33_34_52' '14_35_53' '16_35_54' '4_5_55' '26_30_56' '28_31_57'
 '32_33_58']


In [8]:
# Training
n_iter = 15000
# Register custom reward for training
cr = env._reward_helper.template_reward
#cr.addReward("overflow", CloseToOverflowReward(), 1.0)
cr.addReward("game", GameplayReward(), 1.0)
#cr.addReward("recolines", LinesReconnectedReward(), 1.0)
cr.addReward("l2rpn", L2RPNReward(), 2.0/float(env.n_line))
# Initialize custom rewards
cr.initialize(env)
# Set reward range to something managable
cr.set_range(-1.0, 1.0)

save_path = "kaist_agent_D3QN_opponent_{}_{}".format(attack_period, n_iter)
log_path="tf_logs_D3QN"

train_adversary(env, agent, opponent, num_pre_training_steps, n_iter, save_path, log_path)

Total number of steps: 15256
Step [0] -- Random [0.99]
step 258: Agent survived [529] steps with reward 235.68253469467163
step 268: Agent survived [86] steps with reward 65.98633766174316
loss = 149333.4
step 282: Agent survived [134] steps with reward 96.66918355226517
step 292: Agent survived [83] steps with reward 57.21343821287155
step 301: Agent survived [77] steps with reward 58.479610204696655
step 312: Agent survived [98] steps with reward 70.23024147748947
step 322: Agent survived [86] steps with reward 61.609327018260956
step 328: Agent survived [35] steps with reward 18.472240567207336
step 332: Agent survived [14] steps with reward 9.432526111602783
loss = 23115.06
step 347: Agent survived [146] steps with reward 104.84744328260422
step 356: Agent survived [77] steps with reward 52.02473759651184
step 366: Agent survived [83] steps with reward 64.43562787771225
step 384: Agent survived [185] steps with reward 134.53531378507614
step 389: Agent survived [26] steps with rewa

step 1506: Agent survived [146] steps with reward 106.14648914337158
step 1510: Agent survived [14] steps with reward 6.038665354251862
loss = 1215.974
step 1517: Agent survived [47] steps with reward 34.99532473087311
step 1521: Agent survived [14] steps with reward 9.51524555683136
step 1526: Agent survived [29] steps with reward 19.66529369354248
step 1540: Agent survived [134] steps with reward 89.80013287067413
step 1549: Agent survived [80] steps with reward 55.86423188447952
step 1559: Agent survived [77] steps with reward 51.972525238990784
step 1567: Agent survived [65] steps with reward 39.557570934295654
loss = 457.44586
step 1577: Agent survived [83] steps with reward 58.35631710290909
step 1589: Agent survived [110] steps with reward 82.8214710354805
step 1605: Agent survived [158] steps with reward 119.14502531290054
step 1609: Agent survived [17] steps with reward 10.206428050994873
step 1622: Agent survived [119] steps with reward 89.1295291185379
loss = 617.76105
step 

loss = 45680.703
step 2520: Agent survived [62] steps with reward 44.721267104148865
step 2526: Agent survived [44] steps with reward 27.210818827152252
step 2557: Agent survived [329] steps with reward 230.7115796804428
step 2567: Agent survived [92] steps with reward 66.59659349918365
loss = 38646.02
step 2581: Agent survived [131] steps with reward 94.8400582075119
step 2588: Agent survived [50] steps with reward 33.49692577123642
step 2592: Agent survived [14] steps with reward 7.646378755569458
step 2599: Agent survived [50] steps with reward 32.92957329750061
step 2603: Agent survived [14] steps with reward 7.467150092124939
step 2626: Agent survived [244] steps with reward 181.58845072984695
loss = 21307.324
step 2638: Agent survived [108] steps with reward 62.3344669342041
step 2645: Agent survived [50] steps with reward 35.52220398187637
step 2660: Agent survived [143] steps with reward 108.30232632160187
step 2664: Agent survived [14] steps with reward 9.520828127861023
step 

step 3446: Agent survived [26] steps with reward 16.94094157218933
loss = 6821.9004
step 3483: Agent survived [410] steps with reward 296.7837070822716
step 3493: Agent survived [86] steps with reward 61.036910116672516
step 3506: Agent survived [119] steps with reward 83.11754071712494
step 3518: Agent survived [113] steps with reward 82.05240613222122
loss = 8948.52
step 3534: Agent survived [155] steps with reward 113.01788008213043
step 3542: Agent survived [65] steps with reward 44.26642894744873
step 3549: Agent survived [47] steps with reward 34.97865700721741
step 3556: Agent survived [53] steps with reward 32.2965607047081
step 3562: Agent survived [38] steps with reward 26.36246144771576
step 3567: Agent survived [26] steps with reward 16.26090717315674
step 3574: Agent survived [53] steps with reward 35.983735263347626
loss = 6289.642
step 3584: Agent survived [83] steps with reward 56.97989845275879
step 3590: Agent survived [35] steps with reward 23.555492520332336
step 35

step 4456: Agent survived [293] steps with reward 191.5599024295807
step 4467: Agent survived [95] steps with reward 59.83879339694977
loss = 361.48035
step 4489: Agent survived [230] steps with reward 174.67131346464157
step 4498: Agent survived [74] steps with reward 52.754544377326965
step 4509: Agent survived [95] steps with reward 69.732854783535
step 4519: Agent survived [92] steps with reward 66.2640106678009
step 4523: Agent survived [11] steps with reward 4.97458291053772
step 4529: Agent survived [41] steps with reward 26.767952263355255
loss = 111.42146
step 4538: Agent survived [74] steps with reward 47.972006261348724
step 4553: Agent survived [140] steps with reward 103.0408427119255
step 4559: Agent survived [41] steps with reward 28.081408262252808
step 4574: Agent survived [143] steps with reward 92.49928468465805
step 4585: Agent survived [98] steps with reward 68.16373866796494
loss = 300.36154
step 4596: Agent survived [101] steps with reward 66.19759625196457
step 

step 5666: Agent survived [32] steps with reward 7.963361322879791
step 5670: Agent survived [14] steps with reward 6.950901627540588
step 5679: Agent survived [71] steps with reward 46.76843881607056
step 5702: Agent survived [245] steps with reward 162.18231338262558
loss = 3798.476
step 5728: Agent survived [278] steps with reward 174.67952752113342
step 5732: Agent survived [14] steps with reward 6.0708287358284
step 5744: Agent survived [113] steps with reward 68.7426608800888
step 5756: Agent survived [104] steps with reward 73.34037256240845
step 5762: Agent survived [38] steps with reward 26.156467080116272
loss = 1269.0562
step 5770: Agent survived [65] steps with reward 39.135454535484314
step 5774: Agent survived [11] steps with reward 6.13277268409729
step 5781: Agent survived [53] steps with reward 34.15002506971359
step 5785: Agent survived [14] steps with reward 4.576497852802277
step 5789: Agent survived [11] steps with reward 5.6454890966415405
step 5816: Agent survive

step 6718: Agent survived [50] steps with reward 29.795729339122772
loss = 6.8271866
step 6722: Agent survived [14] steps with reward 5.367199540138245
step 6741: Agent survived [194] steps with reward 132.6107196211815
step 6755: Agent survived [140] steps with reward 82.57663637399673
step 6771: Agent survived [149] steps with reward 93.24931406974792
loss = 32.370716
step 6780: Agent survived [77] steps with reward 28.907960653305054
step 6791: Agent survived [98] steps with reward 59.449388802051544
step 6806: Agent survived [149] steps with reward 91.02363342046738
step 6815: Agent survived [71] steps with reward 44.60338068008423
step 6822: Agent survived [53] steps with reward 32.09743148088455
step 6828: Agent survived [35] steps with reward 18.61381822824478
loss = 41.11666
step 6842: Agent survived [137] steps with reward 86.07438093423843
step 6853: Agent survived [95] steps with reward 56.26521706581116
step 6857: Agent survived [20] steps with reward 4.850414037704468
step

loss = 43.793625
step 7456: Agent survived [218] steps with reward 149.8950293660164
step 7476: Agent survived [209] steps with reward 130.17096197605133
step 7491: Agent survived [146] steps with reward 94.51183807849884
step 7502: Agent survived [95] steps with reward 64.26018959283829
loss = 96.315254
step 7515: Agent survived [125] steps with reward 82.04776656627655
step 7519: Agent survived [17] steps with reward 5.873954772949219
step 7538: Agent survived [194] steps with reward 130.3595787882805
step 7554: Agent survived [158] steps with reward 105.69749581813812
loss = 18.38559
step 7568: Agent survived [131] steps with reward 81.50758767127991
step 7573: Agent survived [29] steps with reward 13.161226570606232
step 7580: Agent survived [44] steps with reward 28.53206729888916
step 7591: Agent survived [108] steps with reward 69.91107082366943
step 7596: Agent survived [19] steps with reward 8.025845229625702
step 7609: Agent survived [124] steps with reward 80.91890609264374


loss = 638.0881
step 8348: Agent survived [101] steps with reward 63.38561064004898
step 8355: Agent survived [50] steps with reward 27.9446142911911
step 8368: Agent survived [119] steps with reward 66.75860387086868
step 8379: Agent survived [98] steps with reward 58.98143923282623
step 8390: Agent survived [98] steps with reward 60.77745062112808
loss = 1353.9646
step 8404: Agent survived [131] steps with reward 81.29782432317734
step 8416: Agent survived [111] steps with reward 67.6596302986145
step 8426: Agent survived [92] steps with reward 55.19835305213928
step 8434: Agent survived [58] steps with reward 31.6613672375679
step 8448: Agent survived [137] steps with reward 86.20558816194534
loss = 590.9644
step 8458: Agent survived [83] steps with reward 51.50705373287201
step 8463: Agent survived [26] steps with reward 14.42252779006958
step 8474: Agent survived [106] steps with reward 59.14243745803833
step 8487: Agent survived [119] steps with reward 64.45405209064484
step 8498

step 9002: Agent survived [122] steps with reward 75.81111752986908
step 9006: Agent survived [14] steps with reward 6.734870433807373
step 9011: Agent survived [26] steps with reward 11.718064427375793
step 9015: Agent survived [14] steps with reward 4.919088900089264
loss = 1527.3782
step 9020: Agent survived [26] steps with reward 15.368091464042664
step 9027: Agent survived [47] steps with reward 31.69245845079422
step 9031: Agent survived [17] steps with reward 7.563506245613098
step 9040: Agent survived [74] steps with reward 46.828440964221954
step 9045: Agent survived [26] steps with reward 14.069075167179108
step 9056: Agent survived [101] steps with reward 62.38807338476181
step 9061: Agent survived [23] steps with reward 12.927767038345337
step 9067: Agent survived [38] steps with reward 15.695760905742645
step 9071: Agent survived [14] steps with reward 5.292949736118317
loss = 3665.647
step 9078: Agent survived [53] steps with reward 26.811803579330444
step 9087: Agent sur

step 9734: Agent survived [14] steps with reward 3.5525525212287903
step 9738: Agent survived [11] steps with reward 3.494310975074768
loss = 3601.1409
step 9744: Agent survived [38] steps with reward 21.481897294521332
step 9749: Agent survived [23] steps with reward 11.938518464565277
step 9754: Agent survived [32] steps with reward 15.577053308486938
step 9758: Agent survived [13] steps with reward 3.784546375274658
step 9765: Agent survived [51] steps with reward 20.60986226797104
step 9773: Agent survived [62] steps with reward 34.642871618270874
step 9781: Agent survived [62] steps with reward 36.98805522918701
step 9786: Agent survived [26] steps with reward 14.02959007024765
step 9794: Agent survived [62] steps with reward 37.52257913351059
loss = 1606.5171
step 9804: Agent survived [83] steps with reward 54.55763131380081
step 9808: Agent survived [17] steps with reward 7.025445342063904
step 9813: Agent survived [20] steps with reward 11.487268447875977
step 9817: Agent survi

step 10341: Agent survived [26] steps with reward 11.815520465373993
step 10348: Agent survived [50] steps with reward 31.045689582824707
step 10354: Agent survived [35] steps with reward 16.671833097934723
step 10358: Agent survived [17] steps with reward 5.9510657787323
loss = 1537.319
step 10363: Agent survived [23] steps with reward 13.214049577713013
step 10367: Agent survived [16] steps with reward 6.022526144981384
step 10371: Agent survived [15] steps with reward 5.209005653858185
step 10375: Agent survived [14] steps with reward 3.738907217979431
step 10379: Agent survived [17] steps with reward 5.069227039813995
step 10384: Agent survived [23] steps with reward 7.603686332702637
step 10393: Agent survived [74] steps with reward 43.21436357498169
step 10397: Agent survived [14] steps with reward 3.8866844177246094
step 10401: Agent survived [14] steps with reward 3.7936169505119324
step 10406: Agent survived [26] steps with reward 12.35555499792099
step 10413: Agent survived [

step 11029: Agent survived [152] steps with reward 107.81331199407578
loss = 535.84784
step 11034: Agent survived [23] steps with reward 14.214949071407318
step 11044: Agent survived [88] steps with reward 55.36434984207153
step 11061: Agent survived [168] steps with reward 121.02917033433914
step 11067: Agent survived [47] steps with reward 28.15371733903885
step 11071: Agent survived [14] steps with reward 4.030019223690033
step 11075: Agent survived [14] steps with reward 3.8426624536514282
step 11079: Agent survived [14] steps with reward 3.7436310052871704
loss = 656.27765
step 11092: Agent survived [113] steps with reward 82.1625748872757
step 11104: Agent survived [113] steps with reward 62.89919865131378
step 11111: Agent survived [50] steps with reward 36.600041687488556
step 11126: Agent survived [152] steps with reward 109.9262530207634
step 11130: Agent survived [8] steps with reward 2.5292348861694336
step 11134: Agent survived [14] steps with reward 7.857147932052612
step

step 11687: Agent survived [50] steps with reward 36.214527010917664
step 11692: Agent survived [26] steps with reward 17.6370667219162
step 11696: Agent survived [14] steps with reward 7.823748350143433
step 11700: Agent survived [14] steps with reward 7.765138030052185
loss = 1364.1931
step 11704: Agent survived [14] steps with reward 7.617347002029419
step 11708: Agent survived [14] steps with reward 7.860889434814453
step 11712: Agent survived [14] steps with reward 7.850487470626831
step 11720: Agent survived [62] steps with reward 45.973244190216064
step 11724: Agent survived [14] steps with reward 7.967731595039368
step 11733: Agent survived [74] steps with reward 53.9947395324707
step 11738: Agent survived [26] steps with reward 17.041646420955658
step 11742: Agent survived [14] steps with reward 7.7738213539123535
step 11746: Agent survived [14] steps with reward 7.84186863899231
step 11750: Agent survived [14] steps with reward 7.7761722803115845
step 11755: Agent survived [2

step 12302: Agent survived [14] steps with reward 7.8092252016067505
step 12306: Agent survived [14] steps with reward 7.7997589111328125
step 12313: Agent survived [50] steps with reward 33.36633378267288
step 12318: Agent survived [26] steps with reward 15.347649276256561
loss = 1066.7114
step 12322: Agent survived [14] steps with reward 7.836466073989868
step 12326: Agent survived [14] steps with reward 7.759990453720093
step 12332: Agent survived [38] steps with reward 26.81336623430252
step 12341: Agent survived [74] steps with reward 55.421833992004395
step 12347: Agent survived [38] steps with reward 26.040399134159088
step 12351: Agent survived [14] steps with reward 7.873682260513306
step 12355: Agent survived [14] steps with reward 7.829426169395447
step 12359: Agent survived [14] steps with reward 7.81649112701416
step 12364: Agent survived [26] steps with reward 17.634669303894043
step 12368: Agent survived [14] steps with reward 7.869751453399658
step 12372: Agent survived

step 12875: Agent survived [14] steps with reward 7.602222800254822
loss = 810.1386
step 12880: Agent survived [26] steps with reward 17.400960087776184
step 12884: Agent survived [14] steps with reward 7.851971983909607
step 12888: Agent survived [14] steps with reward 7.8314818143844604
step 12894: Agent survived [38] steps with reward 26.570891320705414
step 12899: Agent survived [26] steps with reward 17.670825242996216
step 12904: Agent survived [26] steps with reward 17.617069959640503
step 12909: Agent survived [26] steps with reward 16.04308170080185
step 12913: Agent survived [14] steps with reward 7.796104311943054
step 12920: Agent survived [50] steps with reward 36.160295367240906
step 12925: Agent survived [26] steps with reward 17.52547287940979
step 12929: Agent survived [14] steps with reward 7.767491817474365
loss = 1033.6771
step 12937: Agent survived [62] steps with reward 46.7046924829483
step 12943: Agent survived [38] steps with reward 24.968608796596527
step 1295

step 13462: Agent survived [38] steps with reward 23.56296694278717
step 13468: Agent survived [38] steps with reward 21.93457818031311
step 13473: Agent survived [26] steps with reward 15.807214617729187
step 13480: Agent survived [50] steps with reward 28.955456733703613
step 13487: Agent survived [49] steps with reward 32.988173961639404
step 13492: Agent survived [27] steps with reward 16.10112428665161
loss = 1438.7876
step 13504: Agent survived [110] steps with reward 62.93659049272537
step 13509: Agent survived [26] steps with reward 14.746331453323364
step 13517: Agent survived [62] steps with reward 39.061056673526764
step 13523: Agent survived [38] steps with reward 24.054488480091095
step 13530: Agent survived [50] steps with reward 32.60665720701218
step 13534: Agent survived [14] steps with reward 5.48210871219635
step 13538: Agent survived [14] steps with reward 5.4091339111328125
step 13543: Agent survived [26] steps with reward 12.437887191772461
step 13547: Agent survi

step 14036: Agent survived [38] steps with reward 25.341769695281982
step 14043: Agent survived [50] steps with reward 29.550944089889526
step 14049: Agent survived [38] steps with reward 25.395602703094482
step 14055: Agent survived [43] steps with reward 25.452088117599487
loss = 61.87299
step 14059: Agent survived [9] steps with reward 2.928041100502014
step 14063: Agent survived [14] steps with reward 7.020950078964233
step 14070: Agent survived [50] steps with reward 29.214869678020477
step 14074: Agent survived [14] steps with reward 6.946345567703247
step 14079: Agent survived [26] steps with reward 16.283430457115173
step 14084: Agent survived [26] steps with reward 15.901649713516235
step 14088: Agent survived [14] steps with reward 7.023106932640076
step 14092: Agent survived [14] steps with reward 7.121631026268005
step 14098: Agent survived [38] steps with reward 25.359317183494568
step 14103: Agent survived [26] steps with reward 15.752997756004333
step 14107: Agent surviv

step 14603: Agent survived [29] steps with reward 18.69521141052246
step 14607: Agent survived [14] steps with reward 6.9567259550094604
step 14611: Agent survived [14] steps with reward 6.874637842178345
loss = 284.28156
step 14617: Agent survived [38] steps with reward 23.94885241985321
step 14621: Agent survived [14] steps with reward 7.84010374546051
step 14626: Agent survived [26] steps with reward 17.34382724761963
step 14630: Agent survived [14] steps with reward 7.6793681383132935
step 14636: Agent survived [38] steps with reward 27.06743586063385
step 14640: Agent survived [14] steps with reward 7.621377229690552
step 14644: Agent survived [14] steps with reward 7.726631045341492
step 14649: Agent survived [23] steps with reward 16.674485564231873
step 14653: Agent survived [17] steps with reward 10.591197729110718
step 14659: Agent survived [38] steps with reward 27.331135153770447
step 14663: Agent survived [14] steps with reward 7.627694368362427
step 14668: Agent survived 

step 15177: Agent survived [14] steps with reward 7.764972448348999
step 15182: Agent survived [26] steps with reward 17.562559843063354
step 15186: Agent survived [14] steps with reward 7.786218643188477
step 15191: Agent survived [26] steps with reward 16.668022632598877
step 15196: Agent survived [26] steps with reward 17.539554953575134
step 15210: Agent survived [134] steps with reward 98.04309648275375
step 15214: Agent survived [14] steps with reward 7.809442400932312
step 15220: Agent survived [38] steps with reward 24.498033046722412
step 15224: Agent survived [14] steps with reward 7.602436542510986
step 15228: Agent survived [14] steps with reward 7.74468469619751
loss = 11.789568
step 15233: Agent survived [26] steps with reward 16.635192215442657
step 15237: Agent survived [14] steps with reward 7.660014986991882
step 15242: Agent survived [26] steps with reward 15.742449343204498
step 15246: Agent survived [14] steps with reward 7.645749092102051
step 15253: Agent survive

In [8]:
np.insert(np.array([True, True, True]), 0, False, axis=0)

array([False,  True,  True,  True])

In [None]:
from grid2op import make
from grid2op.Runner import Runner
from grid2op.Reward import L2RPNSandBoxScore, L2RPNReward

In [None]:
nb_episode = 10 # number of episodes to evaluate
log_path = './logs-evals'
nb_process = 1 # number of cores to use
max_iter = 150 # maximum number of steps per scenario
verbose = True
save_gif = False

In [None]:
env_name = 'l2rpn_wcci_2020'
env = make(env_name, reward_class=L2RPNSandBoxScore,
           other_rewards={
               "reward": L2RPNReward
           })

agent_name = "kaist"
data_dir = os.path.join('kaist_agent/data')
with open(os.path.join(data_dir, 'param.json'), 'r', encoding='utf-8') as f:
    param = json.load(f)

state_mean = torch.load(os.path.join(data_dir, 'mean.pt'), map_location=param['device']).cpu()
state_std = torch.load(os.path.join(data_dir, 'std.pt'), map_location=param['device']).cpu()
state_std = state_std.masked_fill(state_std<1e-5, 1.)
state_mean[0, sum(env.observation_space.shape[:20]):] = 0
state_std[0, sum(env.observation_space.shape[:20]):] = 1
agent = Kaist(env, state_mean, state_std, name=agent_name, **param)
agent.sim_trial = 0
agent.load_model(data_dir)
    
runner_params = env.get_params_for_runner()
runner_params["verbose"] = False
runner = Runner(**runner_params, agentClass=None, agentInstance=agent)
    
res = runner.run(path_save=log_path, nb_episode=nb_episode, nb_process=nb_process, max_iter=150)
if verbose:
    print("Evaluation summary:")
    for _, chron_name, cum_reward, nb_time_step, max_ts in res:
        msg_tmp = "chronics at: {}".format(chron_name)
        msg_tmp += "\ttotal reward: {:.6f}".format(cum_reward)
        msg_tmp += "\ttime steps: {:.0f}/{:.0f}".format(nb_time_step,
                                                        max_ts)
        print(msg_tmp)

if save_gif:
    save_log_gif(log_path, res)