In [1]:
import os
import json
import math
import numpy as np
import tensorflow as tf
import torch

import grid2op
from d3qn.adversary import D3QN_Opponent
from d3qn.adversary_kaist_state import D3QN_Kaist_State_Opponent
from grid2op.Agent import DoNothingAgent
from grid2op.Action import TopologyChangeAndDispatchAction
from grid2op.Reward import CombinedScaledReward, L2RPNSandBoxScore, L2RPNReward, GameplayReward
from l2rpn_baselines.DoubleDuelingDQN.DoubleDuelingDQNConfig import DoubleDuelingDQNConfig as cfg

from kaist_agent.Kaist import Kaist

In [5]:
MAX_TIMESTEP = 7 * 288

def train_adversary(env, agent, opponent, num_pre_training_steps, n_iter, save_path, log_path):
    # Make sure we can fill the experience buffer
    if num_pre_training_steps < opponent.batch_size * opponent.num_frames:
        num_pre_training_steps = opponent.batch_size * opponent.num_frames
        
    # Loop vars
    num_training_steps = n_iter
    num_steps = num_pre_training_steps + num_training_steps
    step = 0
    alive_steps = 0
    total_reward = 0
    done = True
    print(f"Total number of steps: {num_steps}")

    # Create file system related vars
    logpath = os.path.join(log_path, opponent.name)
    os.makedirs(save_path, exist_ok=True)
    modelpath = os.path.join(save_path, opponent.name + ".h5")
    opponent.tf_writer = tf.summary.create_file_writer(logpath, name=opponent.name)
    opponent._save_hyperparameters(save_path, env, num_steps)
    
    while step < num_steps:
        # Init first time or new episode
        if done:
            new_obs = env.reset() # This shouldn't raise
            agent.reset(new_obs)
            opponent.reset(new_obs)
            done = False
        if cfg.VERBOSE and step % 1000 == 0:
            print("Step [{}] -- Random [{}]".format(step, opponent.epsilon))

        # Save current observation to stacking buffer
        opponent._save_current_frame(opponent.state)

        # Execute attack if allowed
        if step <= num_pre_training_steps:
            opponent.remaining_time = 0
            attack, a = opponent._do_nothing, 0
        else:
            attack, a = opponent.attack(new_obs)

        if a != 0:
#             print(f'ATTACK step {step}: disconnected {a}')
            attack_obs, opp_reward, done, info = env.step(attack)
            if info["is_illegal"] or info["is_ambiguous"] or \
               info["is_dispatching_illegal"] or info["is_illegal_reco"]:
                if cfg.VERBOSE:
                    print(attack, info)
            new_obs = attack_obs
            opponent.tell_attack_continues(None, None, None, None)

        while opponent.remaining_time >= 0 and not done:
            new_obs.time_before_cooldown_line[opponent.attack_line] = opponent.remaining_time
            response = agent.act(new_obs, None, None)
            new_obs, reward, done, info = env.step(response)
            opponent.remaining_time -= 1
            total_reward += reward
            alive_steps += 1
        
        # Save new observation to stacking buffer
        new_state = opponent.convert_obs(new_obs)
        opponent._save_next_frame(new_state)

        # Save to experience buffer
        if len(opponent.frames2) == opponent.num_frames:
            opponent.per_buffer.add(np.array(opponent.frames),
                                a, -1 * reward,
                                np.array(opponent.frames2),
                                opponent.done)


        # Perform training when we have enough experience in buffer
        if step >= num_pre_training_steps:
            training_step = step - num_pre_training_steps
            # Decay chance of random action
            opponent.epsilon = opponent._adaptive_epsilon_decay(training_step)

            # Perform training at given frequency
            if step % cfg.UPDATE_FREQ == 0 and \
               len(opponent.per_buffer) >= opponent.batch_size:
                # Perform training
                opponent._batch_train(training_step, step)

                if cfg.UPDATE_TARGET_SOFT_TAU > 0.0:
                    tau = cfg.UPDATE_TARGET_SOFT_TAU
                    # Update target network towards primary network
                    opponent.policy_net.update_target_soft(opponent.target_net.model, tau)

            # Every UPDATE_TARGET_HARD_FREQ trainings, update target completely
            if cfg.UPDATE_TARGET_HARD_FREQ > 0 and \
               step % (cfg.UPDATE_FREQ * cfg.UPDATE_TARGET_HARD_FREQ) == 0:
                opponent.policy_net.update_target_hard(opponent.target_net.model)
        
        if done:
            opponent.epoch_rewards.append(-1 * total_reward)
            opponent.epoch_alive.append(alive_steps)
            if cfg.VERBOSE and step > num_pre_training_steps:
                print("step {}: Agent survived [{}] steps with reward {}".format(step, alive_steps, total_reward))
            alive_steps = 0
            total_reward = 0         
        else:
            alive_steps += 1
            
        ######## After Each Step #######
        if step > 0 and step % 2000 == 0: # save network every 5000 iters
            opponent.save(modelpath)
        step += 1
        # Make new obs the current obs
        opponent.obs = new_obs
        opponent.state = new_state

    # Save model after all steps
    opponent.save(modelpath)

In [None]:
env_name = 'l2rpn_wcci_2020'
env = grid2op.make(env_name, reward_class=CombinedScaledReward)

# Agent 
agent_name = "kaist"
data_dir = os.path.join('kaist_agent/data')
with open(os.path.join(data_dir, 'param.json'), 'r', encoding='utf-8') as f:
    param = json.load(f)
print(param)
state_mean = torch.load(os.path.join(data_dir, 'mean.pt'), map_location=param['device']).cpu()
state_std = torch.load(os.path.join(data_dir, 'std.pt'), map_location=param['device']).cpu()
state_std = state_std.masked_fill(state_std<1e-5, 1.)
state_mean[0, sum(env.observation_space.shape[:20]):] = 0
state_std[0, sum(env.observation_space.shape[:20]):] = 1
agent = Kaist(env, state_mean, state_std, name=agent_name, **param)
agent.sim_trial = 0
agent.load_model(data_dir)

# Opponent 
opponent_name = "D3QN_kaist"
num_pre_training_steps = 256
learning_rate = 5e-5
initial_epsilon = 0.99
final_epsilon = 0.01
decay_epsilon = 2000
attack_period = 20
lines = ['0_4_2', '10_11_11', '11_12_13', '12_13_14', '12_16_20', 
            '13_14_15', '13_15_16', '14_16_17', '14_35_53', '15_16_21', 
            '16_17_22', '16_18_23', '16_21_27', '16_21_28', '16_33_48', 
            '16_33_49', '16_35_54', '17_24_33', '18_19_24', '18_25_35', 
            '19_20_25', '1_10_12', '1_3_3', '1_4_4', '20_21_26', 
            '21_22_29', '21_23_30', '21_26_36', '22_23_31', '22_26_39', 
            '23_24_32', '23_25_34', '23_26_37', '23_26_38', '26_27_40', 
            '26_28_41', '26_30_56', '27_28_42', '27_29_43', '28_29_44', 
            '28_31_57', '29_33_50', '29_34_51', '2_3_0', '2_4_1', 
            '30_31_45', '31_32_47', '32_33_58', '33_34_52', '4_5_55', 
            '4_6_5', '4_7_6', '5_32_46', '6_7_7', '7_8_8', 
            '7_9_9', '8_9_10', '9_16_18', '9_16_19']

opponent = D3QN_Opponent(env.action_space, env.observation_space, lines_attacked=lines, attack_period=attack_period,
            name=opponent_name, is_training=True, learning_rate=learning_rate,
            initial_epsilon=initial_epsilon, final_epsilon=final_epsilon, decay_epsilon=decay_epsilon)

In [10]:
# Training
n_iter = 5000
# Register custom reward for training
cr = env._reward_helper.template_reward
#cr.addReward("overflow", CloseToOverflowReward(), 1.0)
cr.addReward("game", GameplayReward(), 1.0)
#cr.addReward("recolines", LinesReconnectedReward(), 1.0)
cr.addReward("l2rpn", L2RPNReward(), 2.0/float(env.n_line))
# Initialize custom rewards
cr.initialize(env)
# Set reward range to something managable
cr.set_range(-1.0, 1.0)

save_path = "kaist_agent_D3QN_opponent_{}_{}".format(attack_period, n_iter)
log_path="tf_logs_D3QN"

train_adversary(env, agent, opponent, num_pre_training_steps, n_iter, save_path, log_path)

Total number of steps: 5256
Step [0] -- Random [0.99]
step 266: Agent survived [622] steps with reward 305.9651598930359
step 272: Agent survived [41] steps with reward 25.828663289546967
step 278: Agent survived [44] steps with reward 27.575585186481476
loss = 109489.51
step 287: Agent survived [68] steps with reward 43.43529826402664
step 302: Agent survived [149] steps with reward 91.74529415369034
step 313: Agent survived [95] steps with reward 67.10616558790207
step 318: Agent survived [23] steps with reward 16.47074830532074
step 329: Agent survived [104] steps with reward 74.5729193687439
loss = 43257.875
step 337: Agent survived [59] steps with reward 42.027507066726685
step 347: Agent survived [83] steps with reward 55.785299479961395
step 360: Agent survived [125] steps with reward 85.89805418252945
step 370: Agent survived [83] steps with reward 61.21436274051666
step 380: Agent survived [86] steps with reward 55.96154886484146
step 386: Agent survived [41] steps with reward

step 1124: Agent survived [11] steps with reward 6.774989366531372
step 1146: Agent survived [236] steps with reward 166.80284082889557
step 1153: Agent survived [47] steps with reward 32.25837832689285
step 1160: Agent survived [47] steps with reward 32.12467420101166
step 1168: Agent survived [62] steps with reward 45.271926164627075
loss = 5933.5986
step 1180: Agent survived [116] steps with reward 85.06474888324738
step 1186: Agent survived [32] steps with reward 21.31094402074814
step 1191: Agent survived [29] steps with reward 18.872675716876984
step 1209: Agent survived [182] steps with reward 131.97413992881775
step 1213: Agent survived [14] steps with reward 6.118500232696533
step 1218: Agent survived [26] steps with reward 14.799134612083435
step 1228: Agent survived [86] steps with reward 62.307415187358856
loss = 13107.638
step 1234: Agent survived [38] steps with reward 26.613938808441162
step 1242: Agent survived [62] steps with reward 43.31673884391785
step 1247: Agent s

step 2057: Agent survived [119] steps with reward 92.7317344546318
step 2068: Agent survived [101] steps with reward 73.76946115493774
loss = 34156.945
step 2074: Agent survived [35] steps with reward 25.109918475151062
step 2083: Agent survived [74] steps with reward 57.56542372703552
step 2094: Agent survived [101] steps with reward 72.20702522993088
step 2099: Agent survived [23] steps with reward 13.899448573589325
step 2108: Agent survived [77] steps with reward 52.176638305187225
step 2116: Agent survived [62] steps with reward 42.98801392316818
loss = 18487.361
step 2130: Agent survived [134] steps with reward 100.60043799877167
step 2141: Agent survived [98] steps with reward 72.81356489658356
step 2149: Agent survived [59] steps with reward 44.62063026428223
step 2171: Agent survived [233] steps with reward 169.5452790260315
loss = 22827.75
step 2186: Agent survived [146] steps with reward 109.46653825044632
step 2190: Agent survived [14] steps with reward 7.380891799926758
st

step 3128: Agent survived [26] steps with reward 16.000807523727417
loss = 9350.234
step 3138: Agent survived [86] steps with reward 61.45883822441101
step 3142: Agent survived [14] steps with reward 5.959876656532288
step 3151: Agent survived [74] steps with reward 55.20153284072876
step 3162: Agent survived [98] steps with reward 70.09688293933868
step 3170: Agent survived [62] steps with reward 39.8563197851181
step 3179: Agent survived [71] steps with reward 51.99316471815109
step 3189: Agent survived [89] steps with reward 64.47385013103485
loss = 9232.734
step 3198: Agent survived [74] steps with reward 52.73938947916031
step 3218: Agent survived [203] steps with reward 146.93529379367828
step 3224: Agent survived [41] steps with reward 29.38334596157074
step 3229: Agent survived [23] steps with reward 16.131144285202026
step 3237: Agent survived [65] steps with reward 45.44643718004227
step 3247: Agent survived [86] steps with reward 63.619648575782776
loss = 8446.254
step 3252:

step 4085: Agent survived [47] steps with reward 32.88774049282074
loss = 8290.16
step 4096: Agent survived [98] steps with reward 71.51432198286057
step 4108: Agent survived [113] steps with reward 82.7127189040184
step 4122: Agent survived [131] steps with reward 96.03501015901566
step 4130: Agent survived [59] steps with reward 43.0027619600296
step 4137: Agent survived [59] steps with reward 39.96928060054779
loss = 5910.961
step 4145: Agent survived [56] steps with reward 39.191299736499786
step 4150: Agent survived [26] steps with reward 17.006691694259644
step 4165: Agent survived [143] steps with reward 101.86745148897171
step 4172: Agent survived [53] steps with reward 36.90305668115616
step 4179: Agent survived [50] steps with reward 31.474450409412384
step 4185: Agent survived [38] steps with reward 24.02320009469986
step 4189: Agent survived [14] steps with reward 7.4701114892959595
loss = 2857.7742
step 4204: Agent survived [153] steps with reward 104.26078885793686
step 4

step 4887: Agent survived [29] steps with reward 17.47747230529785
step 4891: Agent survived [15] steps with reward 6.047279417514801
step 4896: Agent survived [22] steps with reward 14.496838927268982
step 4900: Agent survived [14] steps with reward 8.51778507232666
step 4906: Agent survived [38] steps with reward 21.525287330150604
step 4911: Agent survived [26] steps with reward 15.73215264081955
step 4917: Agent survived [38] steps with reward 25.544349551200867
step 4922: Agent survived [26] steps with reward 17.757930517196655
loss = 57.7219
step 4929: Agent survived [59] steps with reward 32.85921984910965
step 4935: Agent survived [32] steps with reward 16.76658171415329
step 4945: Agent survived [92] steps with reward 53.086640655994415
step 4951: Agent survived [29] steps with reward 16.098089039325714
step 4955: Agent survived [14] steps with reward 8.458327770233154
step 4960: Agent survived [29] steps with reward 13.203466534614563
step 4965: Agent survived [23] steps with

## Train D3QN with KAIST state

In [6]:
env_name = 'l2rpn_wcci_2020'
env = grid2op.make(env_name, reward_class=CombinedScaledReward)

# Agent 
agent_name = "kaist"
data_dir = os.path.join('kaist_agent/data')
with open(os.path.join(data_dir, 'param.json'), 'r', encoding='utf-8') as f:
    param = json.load(f)
print(param)
state_mean = torch.load(os.path.join(data_dir, 'mean.pt'), map_location=param['device']).cpu()
state_std = torch.load(os.path.join(data_dir, 'std.pt'), map_location=param['device']).cpu()
state_std = state_std.masked_fill(state_std<1e-5, 1.)
state_mean[0, sum(env.observation_space.shape[:20]):] = 0
state_std[0, sum(env.observation_space.shape[:20]):] = 1
agent = Kaist(env, state_mean, state_std, name=agent_name, **param)
agent.sim_trial = 0
agent.load_model(data_dir)

# Opponent 
opponent_name = "D3QN_kaist_state"
num_pre_training_steps = 256
learning_rate = 5e-5
initial_epsilon = 0.99
final_epsilon = 0.01
decay_epsilon = 15000
attack_period = 20
lines = ['0_4_2', '10_11_11', '11_12_13', '12_13_14', '12_16_20', 
            '13_14_15', '13_15_16', '14_16_17', '14_35_53', '15_16_21', 
            '16_17_22', '16_18_23', '16_21_27', '16_21_28', '16_33_48', 
            '16_33_49', '16_35_54', '17_24_33', '18_19_24', '18_25_35', 
            '19_20_25', '1_10_12', '1_3_3', '1_4_4', '20_21_26', 
            '21_22_29', '21_23_30', '21_26_36', '22_23_31', '22_26_39', 
            '23_24_32', '23_25_34', '23_26_37', '23_26_38', '26_27_40', 
            '26_28_41', '26_30_56', '27_28_42', '27_29_43', '28_29_44', 
            '28_31_57', '29_33_50', '29_34_51', '2_3_0', '2_4_1', 
            '30_31_45', '31_32_47', '32_33_58', '33_34_52', '4_5_55', 
            '4_6_5', '4_7_6', '5_32_46', '6_7_7', '7_8_8', 
            '7_9_9', '8_9_10', '9_16_18', '9_16_19']

opponent = D3QN_Kaist_State_Opponent(env, state_mean, state_std,
                         lines_attacked=lines, attack_period=attack_period,
                         name=opponent_name, is_training=True, learning_rate=learning_rate,
                         initial_epsilon=initial_epsilon, final_epsilon=final_epsilon, decay_epsilon=decay_epsilon)

{'head_number': 8, 'n_history': 12, 'state_dim': 128, 'dropout': 0.0, 'sim_trial': 15, 'threshold': 0.35, 'max_low_len': 19, 'danger': 0.9, 'mask': 3, 'mask_hi': 19, 'use_order': True, 'device': 'cpu'}
O: 72 S: 128 A: 108 (19)
['2_3_0' '2_4_1' '0_4_2' '1_3_3' '1_4_4' '4_6_5' '4_7_6' '6_7_7' '7_8_8'
 '7_9_9' '8_9_10' '10_11_11' '1_10_12' '11_12_13' '12_13_14' '13_14_15'
 '13_15_16' '14_16_17' '9_16_18' '9_16_19' '12_16_20' '15_16_21'
 '16_17_22' '16_18_23' '18_19_24' '19_20_25' '20_21_26' '16_21_27'
 '16_21_28' '21_22_29' '21_23_30' '22_23_31' '23_24_32' '17_24_33'
 '23_25_34' '18_25_35' '21_26_36' '23_26_37' '23_26_38' '22_26_39'
 '26_27_40' '26_28_41' '27_28_42' '27_29_43' '28_29_44' '30_31_45'
 '5_32_46' '31_32_47' '16_33_48' '16_33_49' '29_33_50' '29_34_51'
 '33_34_52' '14_35_53' '16_35_54' '4_5_55' '26_30_56' '28_31_57'
 '32_33_58']


In [7]:
# Training
n_iter = 15000
# Register custom reward for training
cr = env._reward_helper.template_reward
#cr.addReward("overflow", CloseToOverflowReward(), 1.0)
cr.addReward("game", GameplayReward(), 1.0)
#cr.addReward("recolines", LinesReconnectedReward(), 1.0)
cr.addReward("l2rpn", L2RPNReward(), 2.0/float(env.n_line))
# Initialize custom rewards
cr.initialize(env)
# Set reward range to something managable
cr.set_range(-1.0, 1.0)

save_path = "kaist_agent_D3QN_kaist_stae_opponent_{}_{}".format(attack_period, n_iter)
log_path="tf_logs_D3QN"

train_adversary(env, agent, opponent, num_pre_training_steps, n_iter, save_path, log_path)

Total number of steps: 15256
Step [0] -- Random [0.99]
step 266: Agent survived [628] steps with reward 306.78976184129715
loss = 2.144536
step 281: Agent survived [140] steps with reward 103.23902690410614
step 299: Agent survived [182] steps with reward 128.21880465745926
step 304: Agent survived [29] steps with reward 17.328566431999207
step 321: Agent survived [167] steps with reward 119.78530275821686
step 329: Agent survived [67] steps with reward 36.11355358362198
loss = 0.018760484
step 339: Agent survived [84] steps with reward 57.23090785741806
step 358: Agent survived [191] steps with reward 137.79481917619705
step 373: Agent survived [149] steps with reward 109.28870618343353
step 384: Agent survived [95] steps with reward 67.33531028032303
loss = 0.017672557
step 394: Agent survived [89] steps with reward 63.0978439450264
step 409: Agent survived [143] steps with reward 107.74038904905319
step 422: Agent survived [125] steps with reward 91.78135859966278
step 427: Agent su

step 1408: Agent survived [50] steps with reward 35.59557771682739
step 1419: Agent survived [98] steps with reward 74.54118645191193
step 1434: Agent survived [143] steps with reward 108.76971620321274
step 1446: Agent survived [113] steps with reward 88.1163335442543
loss = 0.0093657635
step 1465: Agent survived [200] steps with reward 148.87418484687805
step 1474: Agent survived [65] steps with reward 47.555749237537384
step 1481: Agent survived [50] steps with reward 37.33189606666565
step 1507: Agent survived [278] steps with reward 210.87873375415802
loss = 0.003482356
step 1516: Agent survived [77] steps with reward 49.47469049692154
step 1522: Agent survived [38] steps with reward 26.02262783050537
step 1533: Agent survived [98] steps with reward 67.34186613559723
step 1543: Agent survived [83] steps with reward 62.920291781425476
step 1552: Agent survived [74] steps with reward 56.43739074468613
step 1565: Agent survived [122] steps with reward 88.26124995946884
loss = 0.00168

step 2441: Agent survived [134] steps with reward 93.8367732167244
step 2450: Agent survived [74] steps with reward 48.797177731990814
step 2461: Agent survived [98] steps with reward 70.79749727249146
loss = 0.00043472316
step 2465: Agent survived [14] steps with reward 7.5480815172195435
step 2477: Agent survived [110] steps with reward 80.23000383377075
step 2488: Agent survived [98] steps with reward 70.64849925041199
step 2497: Agent survived [71] steps with reward 53.16875696182251
step 2505: Agent survived [65] steps with reward 48.05711615085602
step 2515: Agent survived [86] steps with reward 61.43076300621033
loss = 0.0005002683
step 2521: Agent survived [38] steps with reward 25.73386687040329
step 2525: Agent survived [14] steps with reward 7.547750115394592
step 2529: Agent survived [14] steps with reward 7.445904493331909
step 2541: Agent survived [110] steps with reward 77.41067630052567
step 2548: Agent survived [47] steps with reward 34.6699880361557
step 2553: Agent s

step 3312: Agent survived [35] steps with reward 24.147887408733368
step 3323: Agent survived [101] steps with reward 75.046466588974
step 3329: Agent survived [38] steps with reward 23.043885409832
step 3333: Agent survived [14] steps with reward 7.536826014518738
step 3337: Agent survived [14] steps with reward 7.463278770446777
step 3342: Agent survived [26] steps with reward 16.007867991924286
step 3349: Agent survived [50] steps with reward 33.84529113769531
step 3358: Agent survived [77] steps with reward 53.91744393110275
loss = 0.00038123917
step 3362: Agent survived [14] steps with reward 5.227520644664764
step 3366: Agent survived [11] steps with reward 4.882351636886597
step 3370: Agent survived [14] steps with reward 7.591624736785889
step 3374: Agent survived [17] steps with reward 7.63068014383316
step 3381: Agent survived [47] steps with reward 28.917149424552917
step 3385: Agent survived [14] steps with reward 7.486613869667053
step 3390: Agent survived [26] steps with 

step 4070: Agent survived [74] steps with reward 50.3182612657547
step 4074: Agent survived [14] steps with reward 6.038317918777466
step 4083: Agent survived [71] steps with reward 54.56876051425934
loss = 0.00046973486
step 4088: Agent survived [29] steps with reward 19.637484431266785
step 4097: Agent survived [74] steps with reward 50.84316521883011
step 4106: Agent survived [74] steps with reward 54.42023706436157
step 4110: Agent survived [14] steps with reward 7.619503498077393
step 4114: Agent survived [14] steps with reward 5.855401456356049
step 4123: Agent survived [74] steps with reward 46.91151285171509
step 4127: Agent survived [14] steps with reward 7.412525057792664
step 4132: Agent survived [26] steps with reward 16.702818036079407
step 4136: Agent survived [14] steps with reward 7.6442975997924805
loss = 0.00057322445
step 4144: Agent survived [59] steps with reward 41.19246792793274
step 4149: Agent survived [26] steps with reward 19.02650487422943
step 4153: Agent s

loss = 0.00054447685
step 4817: Agent survived [38] steps with reward 23.651764512062073
step 4821: Agent survived [14] steps with reward 6.775857448577881
step 4825: Agent survived [14] steps with reward 6.8820788860321045
step 4830: Agent survived [26] steps with reward 14.756244659423828
step 4836: Agent survived [38] steps with reward 23.85275423526764
step 4840: Agent survived [14] steps with reward 6.925741910934448
step 4845: Agent survived [26] steps with reward 14.061615645885468
step 4851: Agent survived [38] steps with reward 21.276918053627014
step 4855: Agent survived [17] steps with reward 5.460526645183563
step 4860: Agent survived [26] steps with reward 13.446434319019318
step 4864: Agent survived [11] steps with reward 3.7659895420074463
step 4871: Agent survived [50] steps with reward 31.59595561027527
loss = 0.0005168194
step 4875: Agent survived [14] steps with reward 6.8588457107543945
step 4879: Agent survived [14] steps with reward 5.640853941440582
step 4884: Ag

step 5517: Agent survived [53] steps with reward 33.417552053928375
step 5523: Agent survived [38] steps with reward 22.488377690315247
step 5530: Agent survived [50] steps with reward 31.08112120628357
step 5535: Agent survived [26] steps with reward 10.539412796497345
step 5539: Agent survived [14] steps with reward 5.455536425113678
loss = 0.00035425552
step 5546: Agent survived [50] steps with reward 31.637892723083496
step 5555: Agent survived [80] steps with reward 48.05590933561325
step 5565: Agent survived [80] steps with reward 53.340453922748566
step 5572: Agent survived [47] steps with reward 30.388909339904785
step 5577: Agent survived [29] steps with reward 18.141247868537903
step 5591: Agent survived [137] steps with reward 78.33709508180618
step 5595: Agent survived [11] steps with reward 4.32940137386322
step 5599: Agent survived [14] steps with reward 6.944630980491638
loss = 0.00033399145
step 5606: Agent survived [56] steps with reward 31.41740119457245
step 5612: Ag

step 6226: Agent survived [38] steps with reward 24.175249695777893
step 6230: Agent survived [11] steps with reward 6.224386215209961
step 6235: Agent survived [26] steps with reward 9.238051533699036
step 6240: Agent survived [29] steps with reward 14.759871006011963
step 6245: Agent survived [26] steps with reward 15.078324913978577
step 6249: Agent survived [14] steps with reward 6.69033682346344
step 6260: Agent survived [98] steps with reward 65.13210797309875
step 6264: Agent survived [14] steps with reward 6.596964001655579
step 6268: Agent survived [14] steps with reward 6.796921014785767
loss = 0.0003327087
step 6277: Agent survived [71] steps with reward 47.22135400772095
step 6281: Agent survived [17] steps with reward 9.121804237365723
step 6291: Agent survived [86] steps with reward 43.307093501091
step 6296: Agent survived [26] steps with reward 15.671719312667847
step 6308: Agent survived [111] steps with reward 62.824870467185974
step 6314: Agent survived [37] steps wi

step 6892: Agent survived [14] steps with reward 6.602653980255127
step 6899: Agent survived [50] steps with reward 31.560089826583862
step 6903: Agent survived [14] steps with reward 6.790333867073059
step 6908: Agent survived [26] steps with reward 15.45113480091095
step 6913: Agent survived [26] steps with reward 15.362082719802856
step 6919: Agent survived [35] steps with reward 23.686394929885864
step 6924: Agent survived [29] steps with reward 18.141093134880066
step 6928: Agent survived [14] steps with reward 6.470435380935669
step 6932: Agent survived [14] steps with reward 6.673765540122986
step 6938: Agent survived [38] steps with reward 20.417517066001892
step 6943: Agent survived [26] steps with reward 15.684109330177307
loss = 0.00027353107
step 6948: Agent survived [26] steps with reward 15.781245350837708
step 6953: Agent survived [23] steps with reward 14.723807692527771
step 6957: Agent survived [17] steps with reward 9.013265132904053
step 6964: Agent survived [50] st

step 7527: Agent survived [14] steps with reward 7.011268377304077
step 7531: Agent survived [14] steps with reward 6.714089632034302
step 7535: Agent survived [14] steps with reward 5.370862782001495
step 7540: Agent survived [26] steps with reward 14.574437975883484
step 7545: Agent survived [26] steps with reward 15.479186773300171
step 7549: Agent survived [14] steps with reward 6.615956783294678
step 7553: Agent survived [14] steps with reward 6.786870121955872
step 7557: Agent survived [14] steps with reward 6.89967668056488
loss = 0.00041836468
step 7562: Agent survived [26] steps with reward 13.784828841686249
step 7567: Agent survived [26] steps with reward 15.915059924125671
step 7574: Agent survived [50] steps with reward 24.59224557876587
step 7582: Agent survived [62] steps with reward 38.43019884824753
step 7586: Agent survived [20] steps with reward 6.3130717277526855
step 7590: Agent survived [8] steps with reward 2.0819480419158936
step 7597: Agent survived [50] steps 

step 8133: Agent survived [14] steps with reward 6.950374722480774
step 8138: Agent survived [26] steps with reward 15.08648431301117
step 8145: Agent survived [50] steps with reward 32.98977243900299
step 8151: Agent survived [38] steps with reward 21.02590376138687
step 8155: Agent survived [14] steps with reward 6.830858826637268
step 8160: Agent survived [26] steps with reward 9.08198893070221
step 8167: Agent survived [50] steps with reward 33.39562666416168
step 8171: Agent survived [14] steps with reward 6.894795179367065
step 8175: Agent survived [14] steps with reward 6.76761257648468
loss = 0.0003485937
step 8179: Agent survived [14] steps with reward 6.8563292026519775
step 8184: Agent survived [26] steps with reward 15.602257013320923
step 8191: Agent survived [50] steps with reward 28.148751258850098
step 8197: Agent survived [38] steps with reward 22.90096229314804
step 8202: Agent survived [26] steps with reward 15.494823098182678
step 8208: Agent survived [38] steps wit

loss = 0.0004338047
step 8798: Agent survived [50] steps with reward 31.903153479099274
step 8803: Agent survived [26] steps with reward 16.75493061542511
step 8808: Agent survived [26] steps with reward 16.74552571773529
step 8813: Agent survived [26] steps with reward 13.675024032592773
step 8824: Agent survived [98] steps with reward 74.78379893302917
step 8830: Agent survived [38] steps with reward 25.89929234981537
step 8835: Agent survived [26] steps with reward 16.161593914031982
step 8839: Agent survived [14] steps with reward 7.765281438827515
step 8844: Agent survived [26] steps with reward 17.338942050933838
loss = 0.00066087226
step 8851: Agent survived [50] steps with reward 36.741220235824585
step 8859: Agent survived [62] steps with reward 43.92353665828705
step 8864: Agent survived [26] steps with reward 13.621933281421661
step 8869: Agent survived [26] steps with reward 16.68951141834259
step 8876: Agent survived [50] steps with reward 34.749462485313416
step 8880: Age

step 9456: Agent survived [14] steps with reward 6.364038586616516
step 9463: Agent survived [50] steps with reward 34.10931700468063
loss = 0.00046727242
step 9467: Agent survived [14] steps with reward 6.2309505343437195
step 9471: Agent survived [11] steps with reward 7.1273698806762695
step 9476: Agent survived [29] steps with reward 19.5634902715683
step 9481: Agent survived [26] steps with reward 16.538050770759583
step 9485: Agent survived [14] steps with reward 6.318078815937042
step 9489: Agent survived [14] steps with reward 6.300321042537689
step 9494: Agent survived [26] steps with reward 15.993403434753418
step 9498: Agent survived [14] steps with reward 6.343953847885132
step 9509: Agent survived [98] steps with reward 74.95468604564667
step 9514: Agent survived [26] steps with reward 16.80920970439911
step 9519: Agent survived [26] steps with reward 13.755432784557343
loss = 0.0006318026
step 9527: Agent survived [62] steps with reward 42.740583181381226
step 9532: Agent

step 10112: Agent survived [14] steps with reward 6.3021820187568665
step 10116: Agent survived [14] steps with reward 6.268745005130768
step 10121: Agent survived [26] steps with reward 16.16375756263733
step 10125: Agent survived [14] steps with reward 6.208588898181915
step 10130: Agent survived [26] steps with reward 16.725287973880768
step 10134: Agent survived [14] steps with reward 6.293539822101593
loss = 0.00042425792
step 10138: Agent survived [14] steps with reward 6.293575286865234
step 10148: Agent survived [86] steps with reward 61.87559515237808
step 10152: Agent survived [14] steps with reward 6.226609408855438
step 10156: Agent survived [14] steps with reward 6.42058938741684
step 10160: Agent survived [14] steps with reward 6.270084321498871
step 10168: Agent survived [62] steps with reward 42.920554995536804
step 10176: Agent survived [62] steps with reward 46.68234992027283
step 10181: Agent survived [26] steps with reward 16.08075726032257
step 10185: Agent survive

step 10742: Agent survived [26] steps with reward 16.865082025527954
step 10748: Agent survived [38] steps with reward 27.038570165634155
loss = 0.00042188048
step 10753: Agent survived [26] steps with reward 15.302425622940063
step 10757: Agent survived [14] steps with reward 6.252996861934662
step 10762: Agent survived [26] steps with reward 17.56093180179596
step 10767: Agent survived [26] steps with reward 17.576429963111877
step 10771: Agent survived [14] steps with reward 6.375540792942047
step 10776: Agent survived [26] steps with reward 17.572596549987793
step 10785: Agent survived [74] steps with reward 52.37918049097061
step 10791: Agent survived [38] steps with reward 26.309446573257446
step 10797: Agent survived [38] steps with reward 26.679880917072296
step 10803: Agent survived [38] steps with reward 22.82905811071396
loss = 0.0004532659
step 10808: Agent survived [26] steps with reward 17.47369635105133
step 10814: Agent survived [38] steps with reward 26.37921452522278


loss = 0.00024338628
step 11315: Agent survived [14] steps with reward 7.071749329566956
step 11320: Agent survived [26] steps with reward 15.237569689750671
step 11327: Agent survived [50] steps with reward 29.931403398513794
step 11331: Agent survived [14] steps with reward 7.016615509986877
step 11340: Agent survived [74] steps with reward 49.68989133834839
step 11349: Agent survived [74] steps with reward 43.84779167175293
step 11353: Agent survived [14] steps with reward 5.483865916728973
step 11357: Agent survived [14] steps with reward 7.089580774307251
step 11361: Agent survived [14] steps with reward 7.059005379676819
loss = 0.00043224177
step 11373: Agent survived [110] steps with reward 71.66929292678833
step 11377: Agent survived [14] steps with reward 7.18994402885437
step 11382: Agent survived [26] steps with reward 15.483827233314514
step 11387: Agent survived [26] steps with reward 14.7327601313591
step 11392: Agent survived [26] steps with reward 16.32497251033783
step

step 11919: Agent survived [26] steps with reward 15.354679882526398
step 11923: Agent survived [14] steps with reward 5.467154383659363
loss = 0.00023675078
step 11930: Agent survived [50] steps with reward 30.449939370155334
step 11934: Agent survived [14] steps with reward 5.540538966655731
step 11938: Agent survived [14] steps with reward 5.669927775859833
step 11942: Agent survived [14] steps with reward 5.486658453941345
step 11946: Agent survived [14] steps with reward 5.50723522901535
step 11952: Agent survived [38] steps with reward 23.154027819633484
step 11956: Agent survived [14] steps with reward 5.6027169823646545
step 11962: Agent survived [41] steps with reward 26.529940366744995
step 11966: Agent survived [11] steps with reward 3.0542646646499634
step 11970: Agent survived [14] steps with reward 5.556347966194153
step 11974: Agent survived [14] steps with reward 5.553350865840912
step 11978: Agent survived [14] steps with reward 5.547545969486237
loss = 0.00030524217
s

loss = 0.00045498705
step 12492: Agent survived [26] steps with reward 17.230545043945312
step 12496: Agent survived [14] steps with reward 6.192424893379211
step 12501: Agent survived [26] steps with reward 15.13216882944107
step 12507: Agent survived [38] steps with reward 24.730697095394135
step 12513: Agent survived [43] steps with reward 25.866163849830627
step 12518: Agent survived [21] steps with reward 12.998260140419006
step 12522: Agent survived [14] steps with reward 7.6083056926727295
step 12526: Agent survived [14] steps with reward 7.723374962806702
step 12530: Agent survived [14] steps with reward 7.6924861669540405
step 12536: Agent survived [38] steps with reward 26.319042801856995
step 12540: Agent survived [14] steps with reward 6.067806720733643
loss = 0.00025091914
step 12545: Agent survived [26] steps with reward 15.860619902610779
step 12550: Agent survived [26] steps with reward 16.811230659484863
step 12555: Agent survived [26] steps with reward 16.668566942214

step 13058: Agent survived [14] steps with reward 7.654163837432861
step 13063: Agent survived [26] steps with reward 17.230465412139893
step 13069: Agent survived [38] steps with reward 26.426711440086365
step 13081: Agent survived [110] steps with reward 81.59591525793076
step 13085: Agent survived [14] steps with reward 6.190456986427307
step 13089: Agent survived [14] steps with reward 6.122609257698059
step 13093: Agent survived [14] steps with reward 6.135006487369537
step 13097: Agent survived [14] steps with reward 6.233956456184387
step 13101: Agent survived [14] steps with reward 6.14011013507843
loss = 0.00042619786
step 13106: Agent survived [26] steps with reward 17.25751495361328
step 13112: Agent survived [38] steps with reward 25.65942919254303
step 13118: Agent survived [35] steps with reward 24.9794282913208
step 13124: Agent survived [38] steps with reward 28.914255499839783
step 13131: Agent survived [53] steps with reward 39.02312874794006
step 13136: Agent survive

step 13647: Agent survived [38] steps with reward 23.320314288139343
step 13651: Agent survived [14] steps with reward 4.802192866802216
step 13655: Agent survived [14] steps with reward 4.724008500576019
step 13660: Agent survived [26] steps with reward 12.597178936004639
loss = 0.0004750301
step 13666: Agent survived [38] steps with reward 17.937501788139343
step 13670: Agent survived [14] steps with reward 6.293556571006775
step 13674: Agent survived [14] steps with reward 6.343873023986816
step 13679: Agent survived [26] steps with reward 11.029176652431488
step 13685: Agent survived [38] steps with reward 23.432115614414215
step 13689: Agent survived [14] steps with reward 5.38752818107605
step 13694: Agent survived [26] steps with reward 12.213940024375916
step 13700: Agent survived [38] steps with reward 22.049435675144196
step 13704: Agent survived [14] steps with reward 5.5673152804374695
step 13711: Agent survived [50] steps with reward 32.45471978187561
step 13716: Agent sur

step 14218: Agent survived [14] steps with reward 3.343205511569977
loss = 0.0006683622
step 14225: Agent survived [50] steps with reward 28.145444452762604
step 14229: Agent survived [11] steps with reward 3.8311988711357117
step 14234: Agent survived [26] steps with reward 13.194503545761108
step 14238: Agent survived [14] steps with reward 5.387081027030945
step 14242: Agent survived [14] steps with reward 4.654618084430695
step 14247: Agent survived [26] steps with reward 14.92142504453659
step 14252: Agent survived [29] steps with reward 15.688247323036194
step 14259: Agent survived [50] steps with reward 28.439760267734528
step 14264: Agent survived [26] steps with reward 11.887221097946167
step 14268: Agent survived [14] steps with reward 3.234805941581726
step 14273: Agent survived [26] steps with reward 12.389989972114563
step 14278: Agent survived [26] steps with reward 13.537203192710876
loss = 0.0004115476
step 14287: Agent survived [74] steps with reward 46.506890654563904

step 14782: Agent survived [23] steps with reward 15.493747234344482
loss = 0.00024981084
step 14787: Agent survived [29] steps with reward 17.585922420024872
step 14791: Agent survived [14] steps with reward 7.189226508140564
step 14795: Agent survived [14] steps with reward 7.102947235107422
step 14799: Agent survived [14] steps with reward 7.096124053001404
step 14803: Agent survived [14] steps with reward 7.212080359458923
step 14809: Agent survived [38] steps with reward 24.48932021856308
step 14813: Agent survived [14] steps with reward 7.089414715766907
step 14817: Agent survived [14] steps with reward 7.137540102005005
step 14822: Agent survived [26] steps with reward 14.6860893368721
step 14826: Agent survived [14] steps with reward 7.267446279525757
step 14831: Agent survived [26] steps with reward 16.221382975578308
step 14835: Agent survived [17] steps with reward 7.434911847114563
step 14839: Agent survived [11] steps with reward 4.805657744407654
loss = 0.00047134777
step

In [None]:
nb_episode = 10 # number of episodes to evaluate
log_path = './logs-evals'
nb_process = 1 # number of cores to use
max_iter = 150 # maximum number of steps per scenario
verbose = True
save_gif = False

In [None]:
env_name = 'l2rpn_wcci_2020'
env = make(env_name, reward_class=L2RPNSandBoxScore,
           other_rewards={
               "reward": L2RPNReward
           })

agent_name = "kaist"
data_dir = os.path.join('kaist_agent/data')
with open(os.path.join(data_dir, 'param.json'), 'r', encoding='utf-8') as f:
    param = json.load(f)

state_mean = torch.load(os.path.join(data_dir, 'mean.pt'), map_location=param['device']).cpu()
state_std = torch.load(os.path.join(data_dir, 'std.pt'), map_location=param['device']).cpu()
state_std = state_std.masked_fill(state_std<1e-5, 1.)
state_mean[0, sum(env.observation_space.shape[:20]):] = 0
state_std[0, sum(env.observation_space.shape[:20]):] = 1
agent = Kaist(env, state_mean, state_std, name=agent_name, **param)
agent.sim_trial = 0
agent.load_model(data_dir)
    
runner_params = env.get_params_for_runner()
runner_params["verbose"] = False
runner = Runner(**runner_params, agentClass=None, agentInstance=agent)
    
res = runner.run(path_save=log_path, nb_episode=nb_episode, nb_process=nb_process, max_iter=150)
if verbose:
    print("Evaluation summary:")
    for _, chron_name, cum_reward, nb_time_step, max_ts in res:
        msg_tmp = "chronics at: {}".format(chron_name)
        msg_tmp += "\ttotal reward: {:.6f}".format(cum_reward)
        msg_tmp += "\ttime steps: {:.0f}/{:.0f}".format(nb_time_step,
                                                        max_ts)
        print(msg_tmp)

if save_gif:
    save_log_gif(log_path, res)

In [12]:
import torch

In [14]:
a = np.array([[1,2,3], [4,5,6]])

In [18]:
b = torch.FloatTensor(a).unsqueeze(0)

In [17]:
np.expand_dims(a, 0)

array([[[1, 2, 3],
        [4, 5, 6]]])

In [19]:
b.size(0)

1

In [22]:
b.numpy()

array([[[1., 2., 3.],
        [4., 5., 6.]]], dtype=float32)