In [108]:
import os
import json
import math
import numpy as np
import tensorflow as tf
import torch

import grid2op
from grid2op.Reward import *
from grid2op.Action import *
from grid2op.Runner import Runner
from grid2op.Reward import L2RPNSandBoxScore, L2RPNReward
from grid2op.Agent import DoNothingAgent

%run d3qn.ipynb
%run opponent.ipynb

In [98]:
def train_d3qn_against_random_opponent(env, agent, opponent, num_pre_training_steps, n_iter,
                                       save_path, log_path):
    # Make sure we can fill the experience buffer
    if num_pre_training_steps < agent.batch_size * agent.num_frames:
        num_pre_training_steps = agent.batch_size * agent.num_frames
        
    # Loop vars
    num_training_steps = n_iter
    num_steps = num_pre_training_steps + num_training_steps
    step = 0
    alive_steps = 0
    total_reward, total_reward_opp = 0, 0
    agent.done = True
    print(f"Total number of steps: {num_steps}")

    # Create file system related vars
    logpath = os.path.join(log_path, agent.name)
    os.makedirs(save_path, exist_ok=True)
    modelpath = os.path.join(save_path, agent.name + ".h5")
    agent.tf_writer = tf.summary.create_file_writer(logpath, name=agent.name)
    agent._save_hyperparameters(save_path, env, num_steps)
    
    while step < num_steps:
        # Init first time or new episode
        if agent.done:
            new_obs = env.reset() # This shouldn't raise
            agent.reset(new_obs)
            opponent.reset()
        if cfg.VERBOSE and step % 1000 == 0:
            print("Step [{}] -- Random [{}]".format(step, agent.epsilon))

        # Save current observation to stacking buffer
        agent._save_current_frame(agent.state)

        # Choose an action
        if step <= num_pre_training_steps:
            a = agent.Qmain.random_move()
        elif np.random.rand(1) < agent.epsilon:
            a = agent.Qmain.random_move()
        elif len(agent.frames) < agent.num_frames:
            a = 0 # Do nothing
        else:
            a, _ = agent.Qmain.predict_move(np.array(agent.frames))

        # Convert it to a valid action
        act = agent.convert_act(a)
        # Execute action
        new_obs, reward, agent.done, info = env.step(act)
        new_state = agent.convert_obs(new_obs)
        if info["is_illegal"] or info["is_ambiguous"] or \
           info["is_dispatching_illegal"] or info["is_illegal_reco"]:
            if cfg.VERBOSE:
                print (a, info)
        total_reward += reward

        # Save new observation to stacking buffer
        agent._save_next_frame(new_state)

        # Save to experience buffer
        if len(agent.frames2) == agent.num_frames:
            agent.per_buffer.add(np.array(agent.frames),
                                a, reward,
                                np.array(agent.frames2),
                                agent.done)

        # Perform training when we have enough experience in buffer
        if step >= num_pre_training_steps:
            training_step = step - num_pre_training_steps
            # Decay chance of random action
            agent.epsilon = agent._adaptive_epsilon_decay(training_step)

            # Perform training at given frequency
            if step % cfg.UPDATE_FREQ == 0 and \
               len(agent.per_buffer) >= agent.batch_size:
                # Perform training
                agent._batch_train(training_step, step)

                if cfg.UPDATE_TARGET_SOFT_TAU > 0.0:
                    tau = cfg.UPDATE_TARGET_SOFT_TAU
                    # Update target network towards primary network
                    agent.Qmain.update_target_soft(agent.Qtarget.model, tau)

            # Every UPDATE_TARGET_HARD_FREQ trainings, update target completely
            if cfg.UPDATE_TARGET_HARD_FREQ > 0 and \
               step % (cfg.UPDATE_FREQ * cfg.UPDATE_TARGET_HARD_FREQ) == 0:
                agent.Qmain.update_target_hard(agent.Qtarget.model)
        
        ######## Opponent #########
        if not agent.done:
            attack = opponent.attack(new_obs)
            if attack is not None:
                print('ATTACK step {}: disconnected {}'.format(step, attack.as_dict()['set_line_status']['disconnected_id'][0]))
                attack_obs, reward_opp, agent.done, info = env.step(attack)
                if info["is_illegal"] or info["is_ambiguous"] or \
                   info["is_dispatching_illegal"] or info["is_illegal_reco"]:
                    if cfg.VERBOSE:
                        print(attack, info)
                total_reward_opp += reward_opp
                new_obs = attack_obs
                opponent.tell_attack_continues(None, None, None, None)
        
        if agent.done:
            agent.epoch_rewards.append(total_reward)
            agent.epoch_alive.append(alive_steps)
            if cfg.VERBOSE and step > num_pre_training_steps:
                print("step {}: Survived [{}] steps".format(step, alive_steps))
                print("Total reward [{}]".format(total_reward))
                print("Total reward opponent [{}]".format(total_reward_opp))
            alive_steps = 0
            total_reward = 0
            total_reward_opp = 0            
        else:
            alive_steps += 1
            
        ######## After Each Step #######
        if step > 0 and step % 2000 == 0: # save network every 5000 iters
            agent.save(modelpath)
        step += 1
        # Make new obs the current obs
        agent.obs = new_obs
        agent.state = new_state

    # Save model after all steps
    agent.save(modelpath)

In [99]:
# agent params
num_pre_training_steps = 256
learning_rate = 1e-4
initial_epsilon = 0.99
final_epsilon = 0.01
decay_epsilon = 20000
# opponent params
attack_period = 20 # 12 * 24
attack_lines = ['0_1_0', '0_4_1', '11_12_13', '12_13_14', '1_2_2', '1_3_3', '1_4_4', '2_3_5', '3_4_6', '3_6_15', '3_8_16', '4_5_17', '5_10_7', '5_11_8', '5_12_9', '6_7_18', '6_8_19', '8_13_11', '8_9_10', '9_10_12']

# training params
n_iter = 20000
env_name = "rte_case14_realistic"
# env = make(env_name, reward_class=IllegalBadReward)
env = make(env_name, reward_class=CombinedScaledReward)

# Register custom reward for training
cr = env._reward_helper.template_reward
#cr.addReward("overflow", CloseToOverflowReward(), 1.0)
cr.addReward("game", GameplayReward(), 1.0)
#cr.addReward("recolines", LinesReconnectedReward(), 1.0)
cr.addReward("l2rpn", L2RPNReward(), 2.0/float(env.n_line))
# Initialize custom rewards
cr.initialize(env)
# Set reward range to something managable
cr.set_range(-1.0, 1.0)

agent_name = "DDDQN"
save_path = "saved_agent_DDDQN_random_opponent_{}_{}".format(attack_period, n_iter)
log_path="tf_logs_DDDQN"

opponent_name = "Random"

agent = DoubleDuelingDQN(env.observation_space, env.action_space, name=agent_name,
                         is_training=True, learning_rate=learning_rate,
                         initial_epsilon=initial_epsilon, final_epsilon=final_epsilon, decay_epsilon=decay_epsilon)
opponent = RandomOpponent(env.observation_space, env.action_space, lines_attacked=attack_lines,
                          name=opponent_name, attack_period=attack_period)

train_d3qn_against_random_opponent(env, agent, opponent, num_pre_training_steps, n_iter,
                                   save_path, log_path)

Agent action size: 141
Total number of steps: 20256
Step [0] -- Random [0.99]
ATTACK step 1: disconnected 6
ATTACK step 91: disconnected 18
ATTACK step 196: disconnected 17
ATTACK step 225: disconnected 15
ATTACK step 240: disconnected 2
ATTACK step 245: disconnected 1
11 {'disc_lines': array([False, False, False, False, False, False, False, False, False,
        True, False,  True, False, False,  True, False,  True,  True,
       False, False]), 'is_illegal': True, 'is_ambiguous': False, 'is_dispatching_illegal': False, 'is_illegal_reco': False, 'opponent_attack_line': None, 'opponent_attack_sub': None, 'opponent_attack_duration': 0, 'exception': [Grid2OpException IllegalAction IllegalAction('Powerline with ids [10] have been modified illegally (cooldown)',), Grid2OpException DivergingPowerFlow DivergingPowerFlow('GAME OVER: Powerflow has diverged during computation or a load has been disconnected or a generator has been disconnected.',)], 'rewards': {}}
ATTACK step 257: disconnected 

step 472: Survived [1] steps
Total reward [-0.15654075145721436]
Total reward opponent [0]
step 476: Survived [3] steps
Total reward [1.1406135559082031]
Total reward opponent [0]
ATTACK step 478: disconnected 18
step 478: Survived [1] steps
Total reward [1.465042233467102]
Total reward opponent [-1.0]
step 479: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
step 482: Survived [2] steps
Total reward [0.5158997774124146]
Total reward opponent [0]
step 488: Survived [5] steps
Total reward [2.8533897399902344]
Total reward opponent [0]
step 490: Survived [1] steps
Total reward [-0.2076730728149414]
Total reward opponent [0]
step 492: Survived [1] steps
Total reward [-0.448319673538208]
Total reward opponent [0]
step 493: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
step 496: Survived [2] steps
Total reward [0.45682477951049805]
Total reward opponent [0]
ATTACK step 498: disconnected 0
step 501: Survived [4] steps
Total reward [2.2263232469558716]
Tota

step 711: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
step 715: Survived [3] steps
Total reward [1.3093748092651367]
Total reward opponent [0]
step 716: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
step 717: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
step 719: Survived [1] steps
Total reward [-0.26376330852508545]
Total reward opponent [0]
step 721: Survived [1] steps
Total reward [-0.20497393608093262]
Total reward opponent [0]
step 724: Survived [2] steps
Total reward [0.5100200176239014]
Total reward opponent [0]
step 727: Survived [2] steps
Total reward [0.44681787490844727]
Total reward opponent [0]
loss = 19.07829
step 730: Survived [2] steps
Total reward [0.45963096618652344]
Total reward opponent [0]
step 734: Survived [3] steps
Total reward [1.0749626159667969]
Total reward opponent [0]
step 737: Survived [2] steps
Total reward [0.33296382427215576]
Total reward opponent [0]
ATTACK step 738: disconnected 6
step 739

step 987: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
step 993: Survived [5] steps
Total reward [2.4450786113739014]
Total reward opponent [0]
step 995: Survived [1] steps
Total reward [-0.2667757272720337]
Total reward opponent [0]
step 999: Survived [3] steps
Total reward [1.3453102111816406]
Total reward opponent [0]
Step [1000] -- Random [0.8949981880496173]
step 1001: Survived [1] steps
Total reward [-0.19566822052001953]
Total reward opponent [0]
ATTACK step 1002: disconnected 18
step 1002: Survived [0] steps
Total reward [0.6514813899993896]
Total reward opponent [-1.0]
step 1005: Survived [2] steps
Total reward [0.30945849418640137]
Total reward opponent [0]
step 1006: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
loss = 8.97258
step 1011: Survived [4] steps
Total reward [2.0078083276748657]
Total reward opponent [0]
step 1013: Survived [1] steps
Total reward [-0.19314801692962646]
Total reward opponent [0]
step 1018: Survived [4] steps
T

step 1243: Survived [2] steps
Total reward [0.6276243925094604]
Total reward opponent [0]
step 1244: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
step 1247: Survived [2] steps
Total reward [0.4707258939743042]
Total reward opponent [0]
step 1252: Survived [4] steps
Total reward [2.0941836833953857]
Total reward opponent [0]
step 1255: Survived [2] steps
Total reward [0.4603993892669678]
Total reward opponent [0]
step 1258: Survived [2] steps
Total reward [0.49730896949768066]
Total reward opponent [0]
step 1261: Survived [2] steps
Total reward [0.46933674812316895]
Total reward opponent [0]
step 1264: Survived [2] steps
Total reward [0.3688802719116211]
Total reward opponent [0]
step 1266: Survived [1] steps
Total reward [-0.41733527183532715]
Total reward opponent [0]
step 1267: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
step 1269: Survived [1] steps
Total reward [-0.21068143844604492]
Total reward opponent [0]
step 1276: Survived [6] steps
To

step 1521: Survived [2] steps
Total reward [0.5676097869873047]
Total reward opponent [0]
step 1524: Survived [2] steps
Total reward [0.4742194414138794]
Total reward opponent [0]
step 1528: Survived [3] steps
Total reward [1.4040786027908325]
Total reward opponent [0]
step 1530: Survived [1] steps
Total reward [-0.24410569667816162]
Total reward opponent [0]
step 1535: Survived [4] steps
Total reward [2.0217387676239014]
Total reward opponent [0]
step 1539: Survived [3] steps
Total reward [1.1673264503479004]
Total reward opponent [0]
step 1543: Survived [3] steps
Total reward [1.294756293296814]
Total reward opponent [0]
step 1545: Survived [1] steps
Total reward [-0.2235856056213379]
Total reward opponent [0]
ATTACK step 1546: disconnected 0
ATTACK step 1547: disconnected 15
ATTACK step 1548: disconnected 3
step 1548: Survived [2] steps
Total reward [2.3126111030578613]
Total reward opponent [0.5391758680343628]
ATTACK step 1549: disconnected 7
step 1551: Survived [2] steps
Total re

step 1800: Survived [2] steps
Total reward [0.5461902618408203]
Total reward opponent [0]
step 1801: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
step 1805: Survived [3] steps
Total reward [1.2327818870544434]
Total reward opponent [0]
ATTACK step 1807: disconnected 1
step 1810: Survived [4] steps
Total reward [2.1303913593292236]
Total reward opponent [0.7929683923721313]
step 1813: Survived [2] steps
Total reward [0.6119036674499512]
Total reward opponent [0]
ATTACK step 1817: disconnected 1
ATTACK step 1819: disconnected 9
step 1819: Survived [5] steps
Total reward [4.330520987510681]
Total reward opponent [-0.3009979724884033]
step 1822: Survived [2] steps
Total reward [0.5686130523681641]
Total reward opponent [0]
step 1823: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
step 1827: Survived [3] steps
Total reward [1.2242693901062012]
Total reward opponent [0]
step 1831: Survived [3] steps
Total reward [1.1319727897644043]
Total reward opponent

step 2079: Survived [3] steps
Total reward [1.3805757761001587]
Total reward opponent [0]
ATTACK step 2080: disconnected 2
step 2081: Survived [1] steps
Total reward [-0.18017184734344482]
Total reward opponent [0.7873320579528809]
step 2085: Survived [3] steps
Total reward [1.213998556137085]
Total reward opponent [0]
step 2091: Survived [5] steps
Total reward [2.8260806798934937]
Total reward opponent [0]
ATTACK step 2092: disconnected 6
step 2094: Survived [2] steps
Total reward [0.5464699268341064]
Total reward opponent [0.7782442569732666]
step 2096: Survived [1] steps
Total reward [-0.2878159284591675]
Total reward opponent [0]
step 2097: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
ATTACK step 2101: disconnected 11
step 2105: Survived [7] steps
Total reward [4.520612835884094]
Total reward opponent [0.7493021488189697]
step 2110: Survived [4] steps
Total reward [2.0619404315948486]
Total reward opponent [0]
ATTACK step 2112: disconnected 6
step 2113: Survived

ATTACK step 2383: disconnected 17
step 2383: Survived [3] steps
Total reward [2.9456393718719482]
Total reward opponent [-1.0]
step 2387: Survived [3] steps
Total reward [1.2136874198913574]
Total reward opponent [0]
step 2389: Survived [1] steps
Total reward [-0.2006528377532959]
Total reward opponent [0]
step 2395: Survived [5] steps
Total reward [2.821622610092163]
Total reward opponent [0]
step 2400: Survived [4] steps
Total reward [2.1313316822052]
Total reward opponent [0]
step 2403: Survived [2] steps
Total reward [0.527446985244751]
Total reward opponent [0]
loss = 9.951506
ATTACK step 2408: disconnected 14
step 2409: Survived [5] steps
Total reward [2.805020570755005]
Total reward opponent [0.7756242752075195]
ATTACK step 2410: disconnected 3
step 2414: Survived [4] steps
Total reward [2.086151599884033]
Total reward opponent [0.764930009841919]
step 2417: Survived [2] steps
Total reward [0.5328896045684814]
Total reward opponent [0]
step 2421: Survived [3] steps
Total reward 

step 2652: Survived [2] steps
Total reward [0.6105977296829224]
Total reward opponent [0]
step 2655: Survived [2] steps
Total reward [0.511383056640625]
Total reward opponent [0]
step 2657: Survived [1] steps
Total reward [-0.2449026107788086]
Total reward opponent [0]
step 2664: Survived [6] steps
Total reward [3.9145963191986084]
Total reward opponent [0]
step 2666: Survived [1] steps
Total reward [-0.22834134101867676]
Total reward opponent [0]
step 2669: Survived [2] steps
Total reward [0.3468773365020752]
Total reward opponent [0]
step 2675: Survived [5] steps
Total reward [2.930806875228882]
Total reward opponent [0]
step 2679: Survived [3] steps
Total reward [1.4752542972564697]
Total reward opponent [0]
step 2682: Survived [2] steps
Total reward [0.5658204555511475]
Total reward opponent [0]
step 2684: Survived [1] steps
Total reward [-0.2302340269088745]
Total reward opponent [0]
ATTACK step 2686: disconnected 15
loss = 2.165325
ATTACK step 2688: disconnected 10
step 2691: Sur

step 2956: Survived [1] steps
Total reward [-0.1891878843307495]
Total reward opponent [0]
step 2959: Survived [2] steps
Total reward [0.42780351638793945]
Total reward opponent [0]
step 2962: Survived [2] steps
Total reward [0.4089224338531494]
Total reward opponent [0]
ATTACK step 2963: disconnected 9
step 2964: Survived [1] steps
Total reward [-0.2342691421508789]
Total reward opponent [0.6902501583099365]
step 2967: Survived [2] steps
Total reward [0.45882654190063477]
Total reward opponent [0]
loss = 1.7287651
step 2970: Survived [2] steps
Total reward [0.5892316102981567]
Total reward opponent [0]
step 2973: Survived [2] steps
Total reward [0.5347855091094971]
Total reward opponent [0]
step 2979: Survived [5] steps
Total reward [2.781567096710205]
Total reward opponent [0]
step 2982: Survived [2] steps
Total reward [0.5519329309463501]
Total reward opponent [0]
step 2984: Survived [1] steps
Total reward [-0.20980453491210938]
Total reward opponent [0]
step 2987: Survived [2] step

step 3245: Survived [6] steps
Total reward [3.33087158203125]
Total reward opponent [0]
ATTACK step 3247: disconnected 4
loss = 1.8588524
step 3248: Survived [2] steps
Total reward [0.6014777421951294]
Total reward opponent [0.7875752449035645]
step 3251: Survived [2] steps
Total reward [0.4736844301223755]
Total reward opponent [0]
step 3256: Survived [4] steps
Total reward [2.0594969987869263]
Total reward opponent [0]
step 3262: Survived [5] steps
Total reward [2.55146062374115]
Total reward opponent [0]
step 3267: Survived [4] steps
Total reward [2.098896026611328]
Total reward opponent [0]
step 3272: Survived [4] steps
Total reward [2.008833169937134]
Total reward opponent [0]
step 3274: Survived [1] steps
Total reward [-0.24061918258666992]
Total reward opponent [0]
step 3276: Survived [1] steps
Total reward [-0.20980453491210938]
Total reward opponent [0]
step 3277: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
step 3279: Survived [1] steps
Total reward [-0.26

step 3558: Survived [5] steps
Total reward [2.9275636672973633]
Total reward opponent [0]
step 3562: Survived [3] steps
Total reward [1.27632474899292]
Total reward opponent [0]
step 3567: Survived [4] steps
Total reward [1.9334943294525146]
Total reward opponent [0]
step 3570: Survived [2] steps
Total reward [0.5085017681121826]
Total reward opponent [0]
ATTACK step 3574: disconnected 6
step 3574: Survived [3] steps
Total reward [3.0223846435546875]
Total reward opponent [-1.0]
ATTACK step 3577: disconnected 15
step 3577: Survived [2] steps
Total reward [2.349771022796631]
Total reward opponent [-1.0]
step 3583: Survived [5] steps
Total reward [3.0145387649536133]
Total reward opponent [0]
loss = 1.5439203
ATTACK step 3587: disconnected 5
step 3587: Survived [3] steps
Total reward [3.0943312644958496]
Total reward opponent [-1.0]
step 3589: Survived [1] steps
Total reward [-0.20487356185913086]
Total reward opponent [0]
step 3591: Survived [1] steps
Total reward [-0.20122599601745605]

step 3877: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
step 3881: Survived [3] steps
Total reward [1.4326714277267456]
Total reward opponent [0]
ATTACK step 3882: disconnected 0
step 3883: Survived [1] steps
Total reward [-0.2947044372558594]
Total reward opponent [0.7107498645782471]
step 3887: Survived [3] steps
Total reward [1.2685351371765137]
Total reward opponent [0]
step 3893: Survived [5] steps
Total reward [2.938276529312134]
Total reward opponent [0]
step 3899: Survived [5] steps
Total reward [2.7042925357818604]
Total reward opponent [0]
step 3906: Survived [6] steps
Total reward [3.8871012926101685]
Total reward opponent [0]
step 3909: Survived [2] steps
Total reward [0.5993238687515259]
Total reward opponent [0]
step 3912: Survived [2] steps
Total reward [0.3903031349182129]
Total reward opponent [0]
step 3915: Survived [2] steps
Total reward [0.5107214450836182]
Total reward opponent [0]
step 3917: Survived [1] steps
Total reward [-0.17620539665222168

step 4190: Survived [4] steps
Total reward [2.0925581455230713]
Total reward opponent [0]
step 4193: Survived [2] steps
Total reward [0.381000280380249]
Total reward opponent [0]
step 4199: Survived [5] steps
Total reward [2.997054934501648]
Total reward opponent [0]
loss = 3.851441
step 4202: Survived [2] steps
Total reward [0.31647276878356934]
Total reward opponent [0]
step 4204: Survived [1] steps
Total reward [-0.18654954433441162]
Total reward opponent [0]
ATTACK step 4205: disconnected 8
step 4207: Survived [2] steps
Total reward [0.6235090494155884]
Total reward opponent [0.8027744293212891]
step 4208: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
step 4209: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
step 4211: Survived [1] steps
Total reward [-0.27614665031433105]
Total reward opponent [0]
step 4214: Survived [2] steps
Total reward [0.34986162185668945]
Total reward opponent [0]
step 4216: Survived [1] steps
Total reward [-0.19619393348

step 4548: Survived [3] steps
Total reward [1.3262888193130493]
Total reward opponent [0]
step 4552: Survived [3] steps
Total reward [1.3984014987945557]
Total reward opponent [0]
step 4555: Survived [2] steps
Total reward [0.3141087293624878]
Total reward opponent [0]
step 4558: Survived [2] steps
Total reward [0.5089209079742432]
Total reward opponent [0]
step 4562: Survived [3] steps
Total reward [1.1329896450042725]
Total reward opponent [0]
step 4567: Survived [4] steps
Total reward [1.997132658958435]
Total reward opponent [0]
step 4569: Survived [1] steps
Total reward [-0.22590339183807373]
Total reward opponent [0]
step 4575: Survived [5] steps
Total reward [2.8566612005233765]
Total reward opponent [0]
step 4578: Survived [2] steps
Total reward [0.45796775817871094]
Total reward opponent [0]
step 4580: Survived [1] steps
Total reward [-0.1689438819885254]
Total reward opponent [0]
ATTACK step 4583: disconnected 18
step 4583: Survived [2] steps
Total reward [2.3671945333480835]

ATTACK step 4888: disconnected 12
step 4890: Survived [2] steps
Total reward [0.5143687725067139]
Total reward opponent [0.7404646873474121]
step 4893: Survived [2] steps
Total reward [0.48777687549591064]
Total reward opponent [0]
step 4897: Survived [3] steps
Total reward [1.2786734104156494]
Total reward opponent [0]
step 4903: Survived [5] steps
Total reward [2.952048659324646]
Total reward opponent [0]
ATTACK step 4904: disconnected 11
step 4906: Survived [2] steps
Total reward [0.5021731853485107]
Total reward opponent [0.7245602607727051]
step 4907: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
step 4913: Survived [5] steps
Total reward [2.678926110267639]
Total reward opponent [0]
step 4917: Survived [3] steps
Total reward [1.305332899093628]
Total reward opponent [0]
step 4923: Survived [5] steps
Total reward [2.7786449193954468]
Total reward opponent [0]
step 4926: Survived [2] steps
Total reward [0.43734681606292725]
Total reward opponent [0]
step 4927: Su

step 5240: Survived [5] steps
Total reward [2.795609712600708]
Total reward opponent [0]
step 5247: Survived [6] steps
Total reward [3.82858943939209]
Total reward opponent [0]
step 5251: Survived [3] steps
Total reward [1.2261234521865845]
Total reward opponent [0]
step 5253: Survived [1] steps
Total reward [-0.2632298469543457]
Total reward opponent [0]
step 5259: Survived [5] steps
Total reward [2.708173990249634]
Total reward opponent [0]
step 5261: Survived [1] steps
Total reward [-0.18672657012939453]
Total reward opponent [0]
step 5262: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
loss = 1.0852712
step 5265: Survived [2] steps
Total reward [0.5099852085113525]
Total reward opponent [0]
step 5266: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
step 5269: Survived [2] steps
Total reward [0.42304861545562744]
Total reward opponent [0]
step 5272: Survived [2] steps
Total reward [0.5695368051528931]
Total reward opponent [0]
step 5288: Survived [

ATTACK step 5572: disconnected 8
step 5574: Survived [6] steps
Total reward [3.701983332633972]
Total reward opponent [0.7803099155426025]
step 5579: Survived [4] steps
Total reward [1.9170385599136353]
Total reward opponent [0]
step 5587: Survived [7] steps
Total reward [4.409801363945007]
Total reward opponent [0]
step 5595: Survived [7] steps
Total reward [4.7004474401474]
Total reward opponent [0]
step 5597: Survived [1] steps
Total reward [-0.2686411142349243]
Total reward opponent [0]
loss = 1.0384982
step 5605: Survived [7] steps
Total reward [4.20825457572937]
Total reward opponent [0]
step 5612: Survived [6] steps
Total reward [3.6478867530822754]
Total reward opponent [0]
step 5615: Survived [2] steps
Total reward [0.558068037033081]
Total reward opponent [0]
step 5621: Survived [5] steps
Total reward [3.0008918046951294]
Total reward opponent [0]
step 5627: Survived [5] steps
Total reward [2.5420539379119873]
Total reward opponent [0]
step 5629: Survived [1] steps
Total rewa

loss = 0.7309774
step 5998: Survived [10] steps
Total reward [6.8848652839660645]
Total reward opponent [0]
step 5999: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
Step [6000] -- Random [0.4489227631566503]
Successfully saved model at: saved_agent_DDDQN_random_opponent_20000/DDDQN.h5
step 6008: Survived [8] steps
Total reward [5.378695249557495]
Total reward opponent [0]
ATTACK step 6012: disconnected 12
step 6013: Survived [4] steps
Total reward [2.1195329427719116]
Total reward opponent [0.6878873109817505]
step 6022: Survived [8] steps
Total reward [5.604054689407349]
Total reward opponent [0]
step 6030: Survived [7] steps
Total reward [4.3832138776779175]
Total reward opponent [0]
step 6035: Survived [4] steps
Total reward [2.2020697593688965]
Total reward opponent [0]
ATTACK step 6036: disconnected 5
step 6039: Survived [3] steps
Total reward [1.3079736232757568]
Total reward opponent [0.7551884651184082]
step 6042: Survived [2] steps
Total reward [0.5828783512

step 6364: Survived [2] steps
Total reward [0.6216611862182617]
Total reward opponent [0]
step 6366: Survived [1] steps
Total reward [-0.19319391250610352]
Total reward opponent [0]
step 6373: Survived [6] steps
Total reward [3.7664761543273926]
Total reward opponent [0]
step 6375: Survived [1] steps
Total reward [-0.29209280014038086]
Total reward opponent [0]
step 6380: Survived [4] steps
Total reward [2.1825579404830933]
Total reward opponent [0]
step 6381: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
loss = 0.6896899
step 6384: Survived [2] steps
Total reward [0.2448890209197998]
Total reward opponent [0]
step 6385: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
step 6391: Survived [5] steps
Total reward [2.9411535263061523]
Total reward opponent [0]
ATTACK step 6399: disconnected 0
step 6400: Survived [8] steps
Total reward [5.147932648658752]
Total reward opponent [0.7687177658081055]
step 6408: Survived [7] steps
Total reward [4.094675302505

step 6782: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
step 6786: Survived [3] steps
Total reward [1.2093831300735474]
Total reward opponent [0]
step 6791: Survived [4] steps
Total reward [2.2839664220809937]
Total reward opponent [0]
step 6797: Survived [5] steps
Total reward [2.810541272163391]
Total reward opponent [0]
step 6802: Survived [4] steps
Total reward [2.258047938346863]
Total reward opponent [0]
step 6812: Survived [9] steps
Total reward [6.077667713165283]
Total reward opponent [0]
step 6820: Survived [7] steps
Total reward [4.0150368213653564]
Total reward opponent [0]
step 6825: Survived [4] steps
Total reward [2.2793455123901367]
Total reward opponent [0]
loss = 0.2596793
ATTACK step 6832: disconnected 14
step 6834: Survived [8] steps
Total reward [5.422338962554932]
Total reward opponent [0.7501877546310425]
step 6839: Survived [4] steps
Total reward [2.043667197227478]
Total reward opponent [0]
step 6841: Survived [1] steps
Total reward [-0.1908

step 7214: Survived [4] steps
Total reward [1.9643654823303223]
Total reward opponent [0]
ATTACK step 7220: disconnected 1
step 7220: Survived [5] steps
Total reward [4.448278784751892]
Total reward opponent [-1.0]
step 7223: Survived [2] steps
Total reward [0.46194887161254883]
Total reward opponent [0]
loss = 0.2011984
step 7227: Survived [3] steps
Total reward [1.202622890472412]
Total reward opponent [0]
step 7233: Survived [5] steps
Total reward [2.7524218559265137]
Total reward opponent [0]
ATTACK step 7235: disconnected 13
step 7239: Survived [5] steps
Total reward [2.858875870704651]
Total reward opponent [0.7682905197143555]
ATTACK step 7244: disconnected 10
step 7244: Survived [4] steps
Total reward [3.81625497341156]
Total reward opponent [-1.0]
step 7246: Survived [1] steps
Total reward [-0.18347394466400146]
Total reward opponent [0]
step 7249: Survived [2] steps
Total reward [0.582017183303833]
Total reward opponent [0]
step 7252: Survived [2] steps
Total reward [0.453824

ATTACK step 7579: disconnected 18
step 7579: Survived [2] steps
Total reward [2.5066171884536743]
Total reward opponent [-1.0]
step 7582: Survived [2] steps
Total reward [0.6144417524337769]
Total reward opponent [0]
step 7587: Survived [4] steps
Total reward [1.9866702556610107]
Total reward opponent [0]
ATTACK step 7590: disconnected 4
step 7591: Survived [3] steps
Total reward [1.3307299613952637]
Total reward opponent [0.7845244407653809]
step 7592: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
step 7594: Survived [1] steps
Total reward [-0.17222881317138672]
Total reward opponent [0]
step 7601: Survived [6] steps
Total reward [3.653061628341675]
Total reward opponent [0]
step 7605: Survived [3] steps
Total reward [1.2854574918746948]
Total reward opponent [0]
step 7608: Survived [2] steps
Total reward [0.5551812648773193]
Total reward opponent [0]
ATTACK step 7610: disconnected 19
step 7612: Survived [3] steps
Total reward [1.2281310558319092]
Total reward oppon

step 7987: Survived [6] steps
Total reward [3.641161561012268]
Total reward opponent [0]
step 7988: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
step 7996: Survived [7] steps
Total reward [4.338987469673157]
Total reward opponent [0]
step 7998: Survived [1] steps
Total reward [-0.19339501857757568]
Total reward opponent [0]
Step [8000] -- Random [0.35014855147807206]
Successfully saved model at: saved_agent_DDDQN_random_opponent_20000/DDDQN.h5
step 8003: Survived [4] steps
Total reward [2.1821491718292236]
Total reward opponent [0]
loss = 0.54725176
step 8009: Survived [5] steps
Total reward [2.920233726501465]
Total reward opponent [0]
step 8013: Survived [3] steps
Total reward [1.1585445404052734]
Total reward opponent [0]
ATTACK step 8015: disconnected 8
step 8015: Survived [1] steps
Total reward [1.6327471733093262]
Total reward opponent [-1.0]
step 8018: Survived [2] steps
Total reward [0.34967339038848877]
Total reward opponent [0]
step 8024: Survived [5] step

step 8373: Survived [7] steps
Total reward [4.674936652183533]
Total reward opponent [0]
ATTACK step 8378: disconnected 8
step 8379: Survived [5] steps
Total reward [3.051358699798584]
Total reward opponent [0.7832787036895752]
step 8383: Survived [3] steps
Total reward [1.2770862579345703]
Total reward opponent [0]
step 8385: Survived [1] steps
Total reward [-0.19708776473999023]
Total reward opponent [0]
step 8391: Survived [5] steps
Total reward [2.9058738946914673]
Total reward opponent [0]
ATTACK step 8398: disconnected 7
loss = 0.19430336
step 8400: Survived [8] steps
Total reward [5.321607828140259]
Total reward opponent [0.6905927658081055]
step 8406: Survived [5] steps
Total reward [2.8757946491241455]
Total reward opponent [0]
step 8412: Survived [5] steps
Total reward [2.529812812805176]
Total reward opponent [0]
step 8413: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
step 8418: Survived [4] steps
Total reward [2.200258493423462]
Total reward opponent [0]

step 8863: Survived [5] steps
Total reward [2.7259751558303833]
Total reward opponent [0]
step 8869: Survived [5] steps
Total reward [2.3136168718338013]
Total reward opponent [0]
step 8879: Survived [9] steps
Total reward [6.006686329841614]
Total reward opponent [0]
step 8882: Survived [2] steps
Total reward [0.5085434913635254]
Total reward opponent [0]
step 8887: Survived [4] steps
Total reward [2.0673837661743164]
Total reward opponent [0]
step 8891: Survived [3] steps
Total reward [1.3855664730072021]
Total reward opponent [0]
step 8895: Survived [3] steps
Total reward [1.4226104021072388]
Total reward opponent [0]
step 8899: Survived [3] steps
Total reward [1.038273811340332]
Total reward opponent [0]
step 8903: Survived [3] steps
Total reward [1.3759567737579346]
Total reward opponent [0]
loss = 0.09689735
step 8906: Survived [2] steps
Total reward [0.5525035858154297]
Total reward opponent [0]
step 8911: Survived [4] steps
Total reward [2.097314953804016]
Total reward opponent

step 9304: Survived [3] steps
Total reward [1.3756526708602905]
Total reward opponent [0]
step 9305: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
step 9312: Survived [6] steps
Total reward [3.981827735900879]
Total reward opponent [0]
step 9319: Survived [6] steps
Total reward [3.8973162174224854]
Total reward opponent [0]
ATTACK step 9325: disconnected 19
step 9325: Survived [5] steps
Total reward [4.516813158988953]
Total reward opponent [-1.0]
step 9329: Survived [3] steps
Total reward [1.2685233354568481]
Total reward opponent [0]
ATTACK step 9333: disconnected 0
ATTACK step 9337: disconnected 19
step 9337: Survived [7] steps
Total reward [6.264599561691284]
Total reward opponent [-0.20242023468017578]
ATTACK step 9339: disconnected 17
step 9339: Survived [1] steps
Total reward [1.5934185981750488]
Total reward opponent [-1.0]
step 9343: Survived [3] steps
Total reward [1.2216318845748901]
Total reward opponent [0]
ATTACK step 9350: disconnected 15
step 9351: Su

ATTACK step 9762: disconnected 13
ATTACK step 9769: disconnected 14
step 9771: Survived [13] steps
Total reward [9.156051516532898]
Total reward opponent [1.5503349304199219]
step 9775: Survived [3] steps
Total reward [1.2472105026245117]
Total reward opponent [0]
step 9783: Survived [7] steps
Total reward [4.376369118690491]
Total reward opponent [0]
step 9788: Survived [4] steps
Total reward [2.1935207843780518]
Total reward opponent [0]
ATTACK step 9792: disconnected 10
step 9799: Survived [10] steps
Total reward [6.836291313171387]
Total reward opponent [0.784172773361206]
loss = 0.09198734
ATTACK step 9807: disconnected 11
step 9809: Survived [9] steps
Total reward [6.092692852020264]
Total reward opponent [0.7454043626785278]
ATTACK step 9819: disconnected 19
step 9819: Survived [9] steps
Total reward [7.962923288345337]
Total reward opponent [-1.0]
step 9824: Survived [4] steps
Total reward [2.2704238891601562]
Total reward opponent [0]
ATTACK step 9828: disconnected 8
step 9829

step 10234: Survived [2] steps
Total reward [0.6454062461853027]
Total reward opponent [0]
step 10237: Survived [2] steps
Total reward [0.6076596975326538]
Total reward opponent [0]
step 10242: Survived [4] steps
Total reward [2.201635479927063]
Total reward opponent [0]
loss = 0.18846671
step 10248: Survived [5] steps
Total reward [2.9706263542175293]
Total reward opponent [0]
ATTACK step 10254: disconnected 0
step 10257: Survived [8] steps
Total reward [4.786317825317383]
Total reward opponent [0.6733407974243164]
ATTACK step 10262: disconnected 18
step 10262: Survived [4] steps
Total reward [3.6596107482910156]
Total reward opponent [-1.0]
step 10265: Survived [2] steps
Total reward [0.6340881586074829]
Total reward opponent [0]
step 10270: Survived [4] steps
Total reward [1.9742798805236816]
Total reward opponent [0]
step 10278: Survived [7] steps
Total reward [4.476909041404724]
Total reward opponent [0]
step 10279: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]


step 10709: Survived [7] steps
Total reward [4.391627311706543]
Total reward opponent [0]
ATTACK step 10710: disconnected 3
step 10722: Survived [12] steps
Total reward [8.26775848865509]
Total reward opponent [0.7795259952545166]
ATTACK step 10728: disconnected 14
step 10730: Survived [7] steps
Total reward [4.568982243537903]
Total reward opponent [0.7192782163619995]
step 10732: Survived [1] steps
Total reward [-0.19738566875457764]
Total reward opponent [0]
step 10736: Survived [3] steps
Total reward [1.4318851232528687]
Total reward opponent [0]
step 10743: Survived [6] steps
Total reward [3.8727976083755493]
Total reward opponent [0]
ATTACK step 10747: disconnected 18
step 10747: Survived [3] steps
Total reward [3.1835291385650635]
Total reward opponent [-1.0]
loss = 0.12614664
step 10754: Survived [6] steps
Total reward [3.674148678779602]
Total reward opponent [0]
step 10756: Survived [1] steps
Total reward [-0.20411324501037598]
Total reward opponent [0]
step 10772: Survived [

step 11196: Survived [10] steps
Total reward [6.559946656227112]
Total reward opponent [0]
loss = 0.0401597
ATTACK step 11204: disconnected 10
step 11206: Survived [9] steps
Total reward [6.3605042695999146]
Total reward opponent [0.8037286996841431]
step 11210: Survived [3] steps
Total reward [1.3812413215637207]
Total reward opponent [0]
ATTACK step 11214: disconnected 16
step 11219: Survived [8] steps
Total reward [5.325126886367798]
Total reward opponent [0.7881183624267578]
step 11222: Survived [2] steps
Total reward [0.6296703815460205]
Total reward opponent [0]
step 11230: Survived [7] steps
Total reward [4.468920707702637]
Total reward opponent [0]
step 11234: Survived [3] steps
Total reward [1.4502589702606201]
Total reward opponent [0]
ATTACK step 11236: disconnected 14
step 11237: Survived [2] steps
Total reward [0.5120632648468018]
Total reward opponent [0.7511954307556152]
step 11247: Survived [9] steps
Total reward [6.2463825941085815]
Total reward opponent [0]
loss = 0.0

step 11656: Survived [6] steps
Total reward [3.6153687238693237]
Total reward opponent [0]
step 11659: Survived [2] steps
Total reward [0.407770037651062]
Total reward opponent [0]
ATTACK step 11662: disconnected 14
step 11662: Survived [2] steps
Total reward [2.289634108543396]
Total reward opponent [-1.0]
step 11666: Survived [3] steps
Total reward [1.428437352180481]
Total reward opponent [0]
step 11670: Survived [3] steps
Total reward [1.440334439277649]
Total reward opponent [0]
step 11675: Survived [4] steps
Total reward [2.057651996612549]
Total reward opponent [0]
step 11679: Survived [3] steps
Total reward [1.4228379726409912]
Total reward opponent [0]
step 11685: Survived [5] steps
Total reward [3.00996470451355]
Total reward opponent [0]
step 11686: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
step 11692: Survived [5] steps
Total reward [2.757017493247986]
Total reward opponent [0]
step 11698: Survived [5] steps
Total reward [3.0554620027542114]
Total rew

step 12132: Survived [7] steps
Total reward [4.750912666320801]
Total reward opponent [0]
step 12141: Survived [8] steps
Total reward [4.9382224678993225]
Total reward opponent [0]
loss = 0.13524106
ATTACK step 12152: disconnected 7
step 12154: Survived [12] steps
Total reward [8.557890057563782]
Total reward opponent [0.776371955871582]
ATTACK step 12159: disconnected 10
step 12171: Survived [16] steps
Total reward [11.638464212417603]
Total reward opponent [0.8117084503173828]
step 12174: Survived [2] steps
Total reward [0.4560967683792114]
Total reward opponent [0]
step 12186: Survived [11] steps
Total reward [8.089370250701904]
Total reward opponent [0]
step 12193: Survived [6] steps
Total reward [3.477463722229004]
Total reward opponent [0]
step 12196: Survived [2] steps
Total reward [0.5185298919677734]
Total reward opponent [0]
step 12197: Survived [0] steps
Total reward [-1.0]
Total reward opponent [0]
ATTACK step 12201: disconnected 17
step 12201: Survived [3] steps
Total rewa

step 12606: Survived [3] steps
Total reward [1.4498569965362549]
Total reward opponent [0]
step 12611: Survived [4] steps
Total reward [2.197605848312378]
Total reward opponent [0]
step 12616: Survived [4] steps
Total reward [2.193587303161621]
Total reward opponent [0]
step 12618: Survived [1] steps
Total reward [-0.1883249282836914]
Total reward opponent [0]
step 12624: Survived [5] steps
Total reward [2.8835582733154297]
Total reward opponent [0]
step 12630: Survived [5] steps
Total reward [2.9024182558059692]
Total reward opponent [0]
step 12633: Survived [2] steps
Total reward [0.47837162017822266]
Total reward opponent [0]
step 12639: Survived [5] steps
Total reward [2.8749395608901978]
Total reward opponent [0]
step 12653: Survived [13] steps
Total reward [9.88047170639038]
Total reward opponent [0]
loss = 0.038298782
step 12659: Survived [5] steps
Total reward [2.8513307571411133]
Total reward opponent [0]
ATTACK step 12668: disconnected 14
step 12670: Survived [10] steps
Total

step 13085: Survived [3] steps
Total reward [1.2651093006134033]
Total reward opponent [0]
step 13095: Survived [9] steps
Total reward [6.0766825675964355]
Total reward opponent [0]
step 13100: Survived [4] steps
Total reward [2.09838604927063]
Total reward opponent [0]
loss = 0.022411563
step 13109: Survived [8] steps
Total reward [4.992574095726013]
Total reward opponent [0]
step 13114: Survived [4] steps
Total reward [2.1527175903320312]
Total reward opponent [0]
step 13120: Survived [5] steps
Total reward [2.927236557006836]
Total reward opponent [0]
step 13131: Survived [10] steps
Total reward [6.7849966287612915]
Total reward opponent [0]
step 13136: Survived [4] steps
Total reward [1.964342713356018]
Total reward opponent [0]
step 13143: Survived [6] steps
Total reward [3.612813949584961]
Total reward opponent [0]
ATTACK step 13149: disconnected 10
step 13150: Survived [6] steps
Total reward [3.8287434577941895]
Total reward opponent [0.8188738822937012]
ATTACK step 13151: disco

ATTACK step 13681: disconnected 5
step 13693: Survived [15] steps
Total reward [10.466482520103455]
Total reward opponent [0.7626054286956787]
step 13697: Survived [3] steps
Total reward [1.2263529300689697]
Total reward opponent [0]
ATTACK step 13710: disconnected 18
step 13710: Survived [12] steps
Total reward [10.469963788986206]
Total reward opponent [-1.0]
step 13715: Survived [4] steps
Total reward [2.1990736722946167]
Total reward opponent [0]
loss = 0.020186875
step 13721: Survived [5] steps
Total reward [2.822036623954773]
Total reward opponent [0]
ATTACK step 13739: disconnected 11
step 13745: Survived [23] steps
Total reward [16.812488913536072]
Total reward opponent [0.7703661918640137]
ATTACK step 13746: disconnected 12
step 13748: Survived [2] steps
Total reward [0.5009361505508423]
Total reward opponent [0.7284713983535767]
ATTACK step 13765: disconnected 18
step 13765: Survived [16] steps
Total reward [13.974810600280762]
Total reward opponent [-1.0]
ATTACK step 13771: 

step 14252: Survived [9] steps
Total reward [5.726251840591431]
Total reward opponent [0.7499599456787109]
step 14256: Survived [3] steps
Total reward [1.4585360288619995]
Total reward opponent [0]
step 14264: Survived [7] steps
Total reward [4.420397520065308]
Total reward opponent [0]
step 14267: Survived [2] steps
Total reward [0.49255871772766113]
Total reward opponent [0]
ATTACK step 14279: disconnected 9
step 14279: Survived [11] steps
Total reward [9.85032844543457]
Total reward opponent [-1.0]
loss = 0.019160932
step 14285: Survived [5] steps
Total reward [2.716288924217224]
Total reward opponent [0]
step 14300: Survived [14] steps
Total reward [10.351104021072388]
Total reward opponent [0]
step 14303: Survived [2] steps
Total reward [0.633211612701416]
Total reward opponent [0]
ATTACK step 14319: disconnected 5
ATTACK step 14329: disconnected 9
step 14329: Survived [25] steps
Total reward [19.06701695919037]
Total reward opponent [-0.3540825843811035]
loss = 0.028708428
ATTACK

ATTACK step 14855: disconnected 3
step 14859: Survived [12] steps
Total reward [8.604267358779907]
Total reward opponent [0.73736572265625]
step 14862: Survived [2] steps
Total reward [0.5803436040878296]
Total reward opponent [0]
ATTACK step 14871: disconnected 3
step 14877: Survived [14] steps
Total reward [9.54810106754303]
Total reward opponent [0.7588226795196533]
step 14885: Survived [7] steps
Total reward [4.4988319873809814]
Total reward opponent [0]
step 14889: Survived [3] steps
Total reward [1.3088020086288452]
Total reward opponent [0]
step 14894: Survived [4] steps
Total reward [2.066009283065796]
Total reward opponent [0]
loss = 0.023981374
ATTACK step 14902: disconnected 1
step 14902: Survived [7] steps
Total reward [6.422415852546692]
Total reward opponent [-1.0]
ATTACK step 14911: disconnected 5
ATTACK step 14914: disconnected 11
ATTACK step 14919: disconnected 9
step 14919: Survived [16] steps
Total reward [13.659617066383362]
Total reward opponent [0.607275128364563]

ATTACK step 15405: disconnected 6
step 15411: Survived [8] steps
Total reward [5.222914218902588]
Total reward opponent [0.7780718803405762]
step 15430: Survived [18] steps
Total reward [13.770164132118225]
Total reward opponent [0]
ATTACK step 15441: disconnected 12
step 15442: Survived [11] steps
Total reward [8.06101381778717]
Total reward opponent [0.758448600769043]
step 15448: Survived [5] steps
Total reward [2.848923683166504]
Total reward opponent [0]
loss = 0.017732646
ATTACK step 15461: disconnected 4
ATTACK step 15475: disconnected 15
step 15477: Survived [28] steps
Total reward [21.9837543964386]
Total reward opponent [1.6378138065338135]
step 15480: Survived [2] steps
Total reward [0.5051528215408325]
Total reward opponent [0]
step 15486: Survived [5] steps
Total reward [2.769253373146057]
Total reward opponent [0]
ATTACK step 15489: disconnected 14
step 15491: Survived [4] steps
Total reward [1.9704183340072632]
Total reward opponent [0.7732648849487305]
ATTACK step 15493

ATTACK step 15917: disconnected 7
step 15919: Survived [2] steps
Total reward [0.5043385028839111]
Total reward opponent [0.7367141246795654]
ATTACK step 15920: disconnected 8
step 15922: Survived [2] steps
Total reward [0.5179301500320435]
Total reward opponent [0.7516670227050781]
step 15939: Survived [16] steps
Total reward [11.137783646583557]
Total reward opponent [0]
ATTACK step 15942: disconnected 2
step 15943: Survived [3] steps
Total reward [1.2545838356018066]
Total reward opponent [0.7077381610870361]
step 15957: Survived [13] steps
Total reward [9.181777477264404]
Total reward opponent [0]
loss = 0.018239122
ATTACK step 15963: disconnected 12
step 15965: Survived [7] steps
Total reward [4.678830623626709]
Total reward opponent [0.769209623336792]
step 15977: Survived [11] steps
Total reward [7.528575658798218]
Total reward opponent [0]
ATTACK step 15985: disconnected 11
step 15985: Survived [7] steps
Total reward [6.198492646217346]
Total reward opponent [-1.0]
step 15997: 

step 16452: Survived [4] steps
Total reward [1.98287832736969]
Total reward opponent [0]
ATTACK step 16455: disconnected 0
step 16457: Survived [4] steps
Total reward [2.2769935131073]
Total reward opponent [0.8333454132080078]
loss = 0.02543609
ATTACK step 16471: disconnected 14
step 16473: Survived [15] steps
Total reward [11.269639730453491]
Total reward opponent [0.8466073274612427]
step 16479: Survived [5] steps
Total reward [2.886810302734375]
Total reward opponent [0]
step 16485: Survived [5] steps
Total reward [2.880497932434082]
Total reward opponent [0]
step 16493: Survived [7] steps
Total reward [4.741690516471863]
Total reward opponent [0]
step 16503: Survived [9] steps
Total reward [6.237051248550415]
Total reward opponent [0]
ATTACK step 16505: disconnected 19
step 16507: Survived [3] steps
Total reward [1.2661938667297363]
Total reward opponent [0.7260206937789917]
ATTACK step 16512: disconnected 13
step 16518: Survived [10] steps
Total reward [6.978721022605896]
Total r

step 17153: Survived [2] steps
Total reward [0.470255970954895]
Total reward opponent [0]
ATTACK step 17154: disconnected 17
step 17154: Survived [0] steps
Total reward [0.7825394868850708]
Total reward opponent [-1.0]
step 17163: Survived [8] steps
Total reward [5.249143600463867]
Total reward opponent [0]
ATTACK step 17182: disconnected 17
step 17182: Survived [18] steps
Total reward [15.531875014305115]
Total reward opponent [-1.0]
ATTACK step 17184: disconnected 17
step 17184: Survived [1] steps
Total reward [1.5421440601348877]
Total reward opponent [-1.0]
loss = 0.0036333231
ATTACK step 17199: disconnected 15
step 17202: Survived [17] steps
Total reward [12.700128078460693]
Total reward opponent [0.7843942642211914]
ATTACK step 17213: disconnected 10
step 17217: Survived [14] steps
Total reward [11.01224684715271]
Total reward opponent [0.8767356872558594]
ATTACK step 17221: disconnected 19
step 17222: Survived [4] steps
Total reward [2.1078110933303833]
Total reward opponent [0.

ATTACK step 17847: disconnected 0
step 17848: Survived [3] steps
Total reward [1.4412683248519897]
Total reward opponent [0.8227373361587524]
step 17856: Survived [7] steps
Total reward [4.503973126411438]
Total reward opponent [0]
ATTACK step 17857: disconnected 13
loss = 0.0024910846
ATTACK step 17870: disconnected 8
step 17870: Survived [13] steps
Total reward [11.37525761127472]
Total reward opponent [-0.20047712326049805]
ATTACK step 17885: disconnected 0
step 17889: Survived [18] steps
Total reward [13.695749163627625]
Total reward opponent [0.8200428485870361]
ATTACK step 17900: disconnected 19
step 17903: Survived [13] steps
Total reward [9.361604928970337]
Total reward opponent [0.7625312805175781]
step 17915: Survived [11] steps
Total reward [7.816784024238586]
Total reward opponent [0]
step 17918: Survived [2] steps
Total reward [0.2806987762451172]
Total reward opponent [0]
loss = 0.0041233376
ATTACK step 17924: disconnected 2
step 17931: Survived [12] steps
Total reward [8

step 18526: Survived [5] steps
Total reward [2.8671579360961914]
Total reward opponent [0]
step 18532: Survived [5] steps
Total reward [2.6861053705215454]
Total reward opponent [0]
loss = 0.0007447018
step 18538: Survived [5] steps
Total reward [2.99481463432312]
Total reward opponent [0]
step 18543: Survived [4] steps
Total reward [2.017834424972534]
Total reward opponent [0]
step 18549: Survived [5] steps
Total reward [2.6829845905303955]
Total reward opponent [0]
step 18555: Survived [5] steps
Total reward [2.8128527402877808]
Total reward opponent [0]
step 18561: Survived [5] steps
Total reward [2.7294256687164307]
Total reward opponent [0]
step 18567: Survived [5] steps
Total reward [2.9192187786102295]
Total reward opponent [0]
step 18573: Survived [5] steps
Total reward [3.0205997228622437]
Total reward opponent [0]
step 18579: Survived [5] steps
Total reward [2.7193297147750854]
Total reward opponent [0]
ATTACK step 18580: disconnected 5
step 18585: Survived [5] steps
Total re

ATTACK step 19028: disconnected 14
step 19030: Survived [4] steps
Total reward [2.132327437400818]
Total reward opponent [0.7672491073608398]
ATTACK step 19038: disconnected 3
step 19038: Survived [7] steps
Total reward [6.31377100944519]
Total reward opponent [-1.0]
loss = 0.0033198441
step 19050: Survived [11] steps
Total reward [7.911637902259827]
Total reward opponent [0]
ATTACK step 19051: disconnected 5
ATTACK step 19062: disconnected 9
step 19064: Survived [13] steps
Total reward [9.124065518379211]
Total reward opponent [1.4581565856933594]
ATTACK step 19076: disconnected 15
step 19078: Survived [13] steps
Total reward [9.007124662399292]
Total reward opponent [0.7739920616149902]
ATTACK step 19085: disconnected 2
step 19085: Survived [6] steps
Total reward [5.65505313873291]
Total reward opponent [-1.0]
loss = 0.0019253247
ATTACK step 19097: disconnected 3
ATTACK step 19100: disconnected 18
step 19100: Survived [14] steps
Total reward [12.230036854743958]
Total reward opponent

126 {'disc_lines': array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False]), 'is_illegal': False, 'is_ambiguous': False, 'is_dispatching_illegal': True, 'is_illegal_reco': False, 'opponent_attack_line': None, 'opponent_attack_sub': None, 'opponent_attack_duration': 0, 'exception': [Grid2OpException AmbiguousAction InvalidRedispatching InvalidRedispatching('You cannot ask for a dispatch higher than pmax - pmin  [it would be always invalid because, even if the sepoint is pmin, this dispatch would set it to a number higher than pmax, which is impossible]. Invalid dispatch for generator(s): [1]',)], 'rewards': {}}
step 19466: Survived [52] steps
Total reward [35.41743344068527]
Total reward opponent [2.454618811607361]
step 19472: Survived [5] steps
Total reward [2.9550479650497437]
Total reward opponent [0]
ATTACK step 19487: disconnected 8
loss = 0.0023176663
step 19488: Survived [15]

ATTACK step 20208: disconnected 8
step 20210: Survived [2] steps
Total reward [0.5547575950622559]
Total reward opponent [0.7663624286651611]
loss = 0.0015493416
ATTACK step 20220: disconnected 2
step 20224: Survived [13] steps
Total reward [9.686258554458618]
Total reward opponent [0.8187801837921143]
step 20232: Survived [7] steps
Total reward [4.206245183944702]
Total reward opponent [0]
ATTACK step 20244: disconnected 14
step 20246: Survived [13] steps
Total reward [9.3363938331604]
Total reward opponent [0.8110084533691406]
step 20254: Survived [7] steps
Total reward [4.290820837020874]
Total reward opponent [0]
Successfully saved model at: saved_agent_DDDQN_random_opponent_20000/DDDQN.h5


## Evaluation

In [111]:
nb_episodesode = 10 # number of episodes to evaluate
log_path = './logs-evals'
nb_process = 1 # number of cores to use
max_iter = 150 # maximum number of steps per scenario
verbose = True
save_gif = False

In [114]:
env_name = "rte_case14_realistic"
env = make(env_name, reward_class=L2RPNSandBoxScore,
           other_rewards={
               "reward": L2RPNReward
           })

agent = DoubleDuelingDQN(env.observation_space, env.action_space, name='D3QN', is_training=False)
agent.load('./saved_agent_DDDQN_random_opponent_20000/DDDQN.h5')
    
runner_params = env.get_params_for_runner()
runner_params["verbose"] = False
runner = Runner(**runner_params, agentClass=None, agentInstance=agent)
    
res = runner.run(path_save=log_path, nb_episode=nb_episode, nb_process=nb_process, max_iter=150)
if verbose:
    print("Evaluation summary:")
    for _, chron_name, cum_reward, nb_time_step, max_ts in res:
        msg_tmp = "chronics at: {}".format(chron_name)
        msg_tmp += "\ttotal reward: {:.6f}".format(cum_reward)
        msg_tmp += "\ttime steps: {:.0f}/{:.0f}".format(nb_time_step,
                                                        max_ts)
        print(msg_tmp)

if save_gif:
    save_log_gif(log_path, res)

Agent action size: 141
Successfully loaded network from: ./saved_agent_DDDQN_random_opponent_20000/DDDQN.h5
Evaluation summary:
chronics at: 000	total reward: 134757.593750	time steps: 23/150
chronics at: 001	total reward: 55964.132812	time steps: 16/150
chronics at: 002	total reward: 699308.500000	time steps: 78/150
chronics at: 003	total reward: 97429.289062	time steps: 20/150
chronics at: 004	total reward: 148633.250000	time steps: 24/150
chronics at: 005	total reward: 179308.546875	time steps: 26/150
chronics at: 006	total reward: 20127.648438	time steps: 14/150
chronics at: 007	total reward: 179776.546875	time steps: 26/150
chronics at: 008	total reward: 121001.671875	time steps: 22/150
chronics at: 009	total reward: 1171.987549	time steps: 6/150


NameError: name 'save_gif' is not defined