In [2]:
import os
import json
import math
import numpy as np
import tensorflow as tf
import torch

import grid2op
from grid2op.Reward import *
from grid2op.Action import *

%run d3qn.ipynb
%run opponent.ipynb

In [3]:
def train_d3qn_against_no_opponent(env, agent, num_pre_training_steps, n_iter,
                                   save_path, log_path):
    # Make sure we can fill the experience buffer
    if num_pre_training_steps < agent.batch_size * agent.num_frames:
        num_pre_training_steps = agent.batch_size * agent.num_frames
        
    # Loop vars
    num_training_steps = n_iter
    num_steps = num_pre_training_steps + num_training_steps
    step = 0
    alive_steps = 0
    total_reward = 0
    agent.done = True
    print(f"Total number of steps: {num_steps}")

    # Create file system related vars
    logpath = os.path.join(log_path, agent.name)
    os.makedirs(save_path, exist_ok=True)
    modelpath = os.path.join(save_path, agent.name + ".h5")
    agent.tf_writer = tf.summary.create_file_writer(logpath, name=agent.name)
    agent._save_hyperparameters(save_path, env, num_steps)
    
    while step < num_steps:
        # Init first time or new episode
        if agent.done:
            new_obs = env.reset() # This shouldn't raise
            agent.reset(new_obs)
        if cfg.VERBOSE and step % 1000 == 0:
            print("Step [{}] -- Random [{}]".format(step, agent.epsilon))

        # Save current observation to stacking buffer
        agent._save_current_frame(agent.state)

        # Choose an action
        if step <= num_pre_training_steps:
            a = agent.Qmain.random_move()
        elif np.random.rand(1) < agent.epsilon:
            a = agent.Qmain.random_move()
        elif len(agent.frames) < agent.num_frames:
            a = 0 # Do nothing
        else:
            a, _ = agent.Qmain.predict_move(np.array(agent.frames))

        # Convert it to a valid action
        act = agent.convert_act(a)
        # Execute action
        new_obs, reward, agent.done, info = env.step(act)
        new_state = agent.convert_obs(new_obs)
        if info["is_illegal"] or info["is_ambiguous"] or \
           info["is_dispatching_illegal"] or info["is_illegal_reco"]:
            if cfg.VERBOSE:
                print (a, info)
        total_reward += reward

        # Save new observation to stacking buffer
        agent._save_next_frame(new_state)

        # Save to experience buffer
        if len(agent.frames2) == agent.num_frames:
            agent.per_buffer.add(np.array(agent.frames),
                                a, reward,
                                np.array(agent.frames2),
                                agent.done)

        # Perform training when we have enough experience in buffer
        if step >= num_pre_training_steps:
            training_step = step - num_pre_training_steps
            # Decay chance of random action
            agent.epsilon = agent._adaptive_epsilon_decay(training_step)

            # Perform training at given frequency
            if step % cfg.UPDATE_FREQ == 0 and \
               len(agent.per_buffer) >= agent.batch_size:
                # Perform training
                agent._batch_train(training_step, step)

                if cfg.UPDATE_TARGET_SOFT_TAU > 0.0:
                    tau = cfg.UPDATE_TARGET_SOFT_TAU
                    # Update target network towards primary network
                    agent.Qmain.update_target_soft(agent.Qtarget.model, tau)

            # Every UPDATE_TARGET_HARD_FREQ trainings, update target completely
            if cfg.UPDATE_TARGET_HARD_FREQ > 0 and \
               step % (cfg.UPDATE_FREQ * cfg.UPDATE_TARGET_HARD_FREQ) == 0:
                agent.Qmain.update_target_hard(agent.Qtarget.model)
        
        if agent.done:
            agent.epoch_rewards.append(total_reward)
            agent.epoch_alive.append(alive_steps)
            if cfg.VERBOSE and step > num_pre_training_steps:
                print("step {}: Survived [{}] steps".format(step, alive_steps))
                print("Total reward [{}]".format(total_reward))
            alive_steps = 0
            total_reward = 0
        else:
            alive_steps += 1
            
        ######## After Each Step #######
        if step > 0 and step % 2000 == 0: # save network every 5000 iters
            agent.save(modelpath)
        step += 1
        # Make new obs the current obs
        agent.obs = new_obs
        agent.state = new_state

    # Save model after all steps
    agent.save(modelpath)

In [5]:
# agent params
num_pre_training_steps = 256
learning_rate = 1e-4
initial_epsilon = 0.99
final_epsilon = 0.01
decay_epsilon = 20000

# training params
n_iter = 20000
env_name = "rte_case14_realistic"
env = make(env_name, reward_class=CombinedScaledReward)

# Register custom reward for training
cr = env._reward_helper.template_reward
#cr.addReward("overflow", CloseToOverflowReward(), 1.0)
cr.addReward("game", GameplayReward(), 1.0)
#cr.addReward("recolines", LinesReconnectedReward(), 1.0)
cr.addReward("l2rpn", L2RPNReward(), 2.0/float(env.n_line))
# Initialize custom rewards
cr.initialize(env)
# Set reward range to something managable
cr.set_range(-1.0, 1.0)

agent_name = "DDDQN"
save_path = "saved_agent_DDDQN_no_opponent_{}".format(n_iter)
log_path="tf_logs_DDDQN"

agent = DoubleDuelingDQN(env.observation_space, env.action_space, name=agent_name,
                         is_training=True, learning_rate=learning_rate,
                         initial_epsilon=initial_epsilon, final_epsilon=final_epsilon, decay_epsilon=decay_epsilon)

train_d3qn_against_no_opponent(env, agent, num_pre_training_steps, n_iter,
                                   save_path, log_path)

Agent action size: 141
Total number of steps: 20256
Step [0] -- Random [0.99]
step 257: Survived [2] steps
Total reward [0.4758448600769043]
step 260: Survived [2] steps
Total reward [0.5162427425384521]
step 262: Survived [1] steps
Total reward [-0.22687530517578125]
step 263: Survived [0] steps
Total reward [-1.0]
step 269: Survived [5] steps
Total reward [2.5897278785705566]
loss = 6839.189
step 280: Survived [10] steps
Total reward [7.117826581001282]
step 281: Survived [0] steps
Total reward [-1.0]
step 283: Survived [1] steps
Total reward [-0.1944117546081543]
step 285: Survived [1] steps
Total reward [-0.15723586082458496]
step 286: Survived [0] steps
Total reward [-1.0]
step 289: Survived [2] steps
Total reward [0.4938105344772339]
step 291: Survived [1] steps
Total reward [-0.2002314329147339]
step 294: Survived [2] steps
Total reward [0.4412109851837158]
step 300: Survived [5] steps
Total reward [2.792956829071045]
step 306: Survived [5] steps
Total reward [2.829564690589905]

step 638: Survived [0] steps
Total reward [-1.0]
step 641: Survived [2] steps
Total reward [0.3148399591445923]
step 644: Survived [2] steps
Total reward [0.6524664163589478]
step 646: Survived [1] steps
Total reward [-0.23400557041168213]
step 650: Survived [3] steps
Total reward [1.2080037593841553]
step 651: Survived [0] steps
Total reward [-1.0]
step 655: Survived [3] steps
Total reward [1.3051862716674805]
step 661: Survived [5] steps
Total reward [2.8659268617630005]
step 665: Survived [3] steps
Total reward [1.4436231851577759]
step 670: Survived [4] steps
Total reward [2.187295436859131]
step 671: Survived [0] steps
Total reward [-1.0]
loss = 103.28259
step 672: Survived [0] steps
Total reward [-1.0]
step 677: Survived [4] steps
Total reward [2.2079498767852783]
step 681: Survived [3] steps
Total reward [0.7469242215156555]
step 682: Survived [0] steps
Total reward [-1.0]
step 684: Survived [1] steps
Total reward [-0.2564300298690796]
step 687: Survived [2] steps
Total reward [

step 1036: Survived [5] steps
Total reward [2.8560553789138794]
step 1037: Survived [0] steps
Total reward [-1.0]
step 1041: Survived [3] steps
Total reward [1.3659497499465942]
step 1044: Survived [2] steps
Total reward [0.463711142539978]
step 1049: Survived [4] steps
Total reward [2.2953548431396484]
step 1054: Survived [4] steps
Total reward [1.9092825651168823]
step 1055: Survived [0] steps
Total reward [-1.0]
step 1060: Survived [4] steps
Total reward [1.7924312353134155]
step 1063: Survived [2] steps
Total reward [0.2592122554779053]
loss = 21.177113
step 1066: Survived [2] steps
Total reward [0.6230053901672363]
step 1068: Survived [1] steps
Total reward [-0.18503832817077637]
step 1071: Survived [2] steps
Total reward [0.46353018283843994]
step 1074: Survived [2] steps
Total reward [0.36610865592956543]
step 1083: Survived [8] steps
Total reward [5.614491820335388]
step 1084: Survived [0] steps
Total reward [-1.0]
step 1087: Survived [2] steps
Total reward [0.545758843421936]


step 1442: Survived [4] steps
Total reward [2.151926875114441]
step 1447: Survived [4] steps
Total reward [2.0646060705184937]
step 1449: Survived [1] steps
Total reward [-0.3160921335220337]
step 1452: Survived [2] steps
Total reward [0.47760283946990967]
loss = 20.266653
step 1456: Survived [3] steps
Total reward [1.2421305179595947]
step 1457: Survived [0] steps
Total reward [-1.0]
step 1460: Survived [2] steps
Total reward [0.4466984272003174]
step 1462: Survived [1] steps
Total reward [-0.18335425853729248]
step 1463: Survived [0] steps
Total reward [-1.0]
step 1464: Survived [0] steps
Total reward [-1.0]
step 1468: Survived [3] steps
Total reward [1.3160704374313354]
step 1471: Survived [2] steps
Total reward [0.5843439102172852]
step 1475: Survived [3] steps
Total reward [1.313818335533142]
step 1479: Survived [3] steps
Total reward [1.3316068649291992]
step 1481: Survived [1] steps
Total reward [-0.1960461139678955]
step 1483: Survived [1] steps
Total reward [-0.242165327072143

step 1846: Survived [1] steps
Total reward [-0.17377448081970215]
loss = 13.484483
step 1848: Survived [1] steps
Total reward [-0.2337939739227295]
step 1853: Survived [4] steps
Total reward [2.1071972846984863]
step 1855: Survived [1] steps
Total reward [-0.181196928024292]
step 1863: Survived [7] steps
Total reward [4.44002377986908]
step 1864: Survived [0] steps
Total reward [-1.0]
step 1867: Survived [2] steps
Total reward [0.43021678924560547]
step 1871: Survived [3] steps
Total reward [1.2044025659561157]
step 1875: Survived [3] steps
Total reward [1.3048458099365234]
step 1878: Survived [2] steps
Total reward [0.06549906730651855]
step 1881: Survived [2] steps
Total reward [0.5098645687103271]
step 1884: Survived [2] steps
Total reward [0.47810590267181396]
step 1889: Survived [4] steps
Total reward [2.085319757461548]
step 1892: Survived [2] steps
Total reward [0.41849982738494873]
step 1896: Survived [3] steps
Total reward [1.3851933479309082]
step 1900: Survived [3] steps
Tot

step 2309: Survived [3] steps
Total reward [1.3569972515106201]
step 2312: Survived [2] steps
Total reward [0.4619009494781494]
step 2313: Survived [0] steps
Total reward [-1.0]
step 2316: Survived [2] steps
Total reward [0.5874497890472412]
step 2321: Survived [4] steps
Total reward [2.1684634685516357]
step 2324: Survived [2] steps
Total reward [0.517040491104126]
step 2329: Survived [4] steps
Total reward [2.154364824295044]
step 2330: Survived [0] steps
Total reward [-1.0]
step 2335: Survived [4] steps
Total reward [2.0312952995300293]
step 2341: Survived [5] steps
Total reward [2.6481040716171265]
step 2344: Survived [2] steps
Total reward [0.519942045211792]
loss = 2.95331
step 2353: Survived [8] steps
Total reward [5.028199195861816]
step 2358: Survived [4] steps
Total reward [1.7014440298080444]
step 2361: Survived [2] steps
Total reward [0.5177520513534546]
step 2366: Survived [4] steps
Total reward [2.2736687660217285]
step 2370: Survived [3] steps
Total reward [1.27131128311

step 2769: Survived [7] steps
Total reward [3.834242582321167]
step 2770: Survived [0] steps
Total reward [-1.0]
step 2775: Survived [4] steps
Total reward [2.204305648803711]
step 2776: Survived [0] steps
Total reward [-1.0]
step 2784: Survived [7] steps
Total reward [4.54250168800354]
step 2795: Survived [10] steps
Total reward [6.729552507400513]
loss = 2.8629918
step 2801: Survived [5] steps
Total reward [3.117176651954651]
step 2805: Survived [3] steps
Total reward [1.397620439529419]
step 2808: Survived [2] steps
Total reward [0.3341219425201416]
step 2814: Survived [5] steps
Total reward [2.7317702770233154]
step 2820: Survived [5] steps
Total reward [2.762086033821106]
step 2826: Survived [5] steps
Total reward [2.6706297397613525]
step 2832: Survived [5] steps
Total reward [2.88908851146698]
step 2835: Survived [2] steps
Total reward [0.5879876613616943]
step 2836: Survived [0] steps
Total reward [-1.0]
step 2839: Survived [2] steps
Total reward [0.5190857648849487]
step 2852:

step 3232: Survived [3] steps
Total reward [1.3673765659332275]
step 3235: Survived [2] steps
Total reward [0.5207674503326416]
step 3240: Survived [4] steps
Total reward [2.191148519515991]
step 3242: Survived [1] steps
Total reward [-0.2055220603942871]
step 3243: Survived [0] steps
Total reward [-1.0]
step 3246: Survived [2] steps
Total reward [0.5075645446777344]
step 3247: Survived [0] steps
Total reward [-1.0]
loss = 3.2020552
step 3251: Survived [3] steps
Total reward [1.354177713394165]
step 3253: Survived [1] steps
Total reward [-0.2379751205444336]
step 3256: Survived [2] steps
Total reward [0.5538504123687744]
step 3257: Survived [0] steps
Total reward [-1.0]
step 3260: Survived [2] steps
Total reward [0.6258705854415894]
step 3263: Survived [2] steps
Total reward [0.5478197336196899]
step 3270: Survived [6] steps
Total reward [3.430999755859375]
step 3274: Survived [3] steps
Total reward [1.3453866243362427]
step 3278: Survived [3] steps
Total reward [1.2876101732254028]
st

step 3694: Survived [2] steps
Total reward [0.4817056655883789]
loss = 1.6048537
step 3697: Survived [2] steps
Total reward [0.35699570178985596]
step 3704: Survived [6] steps
Total reward [3.3281078338623047]
step 3707: Survived [2] steps
Total reward [0.5117630958557129]
step 3710: Survived [2] steps
Total reward [0.5342633724212646]
step 3715: Survived [4] steps
Total reward [2.073902130126953]
step 3718: Survived [2] steps
Total reward [0.5074279308319092]
step 3721: Survived [2] steps
Total reward [0.5595148801803589]
step 3725: Survived [3] steps
Total reward [1.2925158739089966]
step 3731: Survived [5] steps
Total reward [3.168239116668701]
step 3737: Survived [5] steps
Total reward [2.578926920890808]
step 3741: Survived [3] steps
Total reward [1.4674897193908691]
step 3745: Survived [3] steps
Total reward [1.2440695762634277]
step 3749: Survived [3] steps
Total reward [0.93499755859375]
step 3751: Survived [1] steps
Total reward [-0.17435097694396973]
loss = 1.5309234
step 375

step 4158: Survived [1] steps
Total reward [-0.18851184844970703]
step 4161: Survived [2] steps
Total reward [0.5019748210906982]
step 4166: Survived [4] steps
Total reward [1.9590680599212646]
step 4167: Survived [0] steps
Total reward [-1.0]
step 4168: Survived [0] steps
Total reward [-1.0]
step 4171: Survived [2] steps
Total reward [0.2924168109893799]
step 4175: Survived [3] steps
Total reward [1.4266711473464966]
step 4179: Survived [3] steps
Total reward [1.296147346496582]
step 4183: Survived [3] steps
Total reward [1.4279987812042236]
step 4189: Survived [5] steps
Total reward [2.941389799118042]
step 4194: Survived [4] steps
Total reward [2.2989310026168823]
step 4196: Survived [1] steps
Total reward [-0.17350125312805176]
loss = 1.2582304
step 4200: Survived [3] steps
Total reward [1.2144720554351807]
step 4201: Survived [0] steps
Total reward [-1.0]
step 4202: Survived [0] steps
Total reward [-1.0]
step 4206: Survived [3] steps
Total reward [1.482151985168457]
step 4208: Sur

step 4666: Survived [5] steps
Total reward [2.8729406595230103]
step 4672: Survived [5] steps
Total reward [2.785174012184143]
step 4677: Survived [4] steps
Total reward [2.122851610183716]
step 4681: Survived [3] steps
Total reward [1.4399949312210083]
step 4685: Survived [3] steps
Total reward [1.146043300628662]
step 4687: Survived [1] steps
Total reward [-0.19300305843353271]
step 4692: Survived [4] steps
Total reward [2.1243393421173096]
step 4698: Survived [5] steps
Total reward [2.9965109825134277]
loss = 1.7681296
step 4706: Survived [7] steps
Total reward [4.652407288551331]
step 4709: Survived [2] steps
Total reward [0.5667072534561157]
step 4714: Survived [4] steps
Total reward [1.8967913389205933]
step 4719: Survived [4] steps
Total reward [1.8313945531845093]
step 4725: Survived [5] steps
Total reward [3.13464891910553]
step 4729: Survived [3] steps
Total reward [1.346008539199829]
step 4733: Survived [3] steps
Total reward [1.3785181045532227]
step 4734: Survived [0] step

step 5149: Survived [5] steps
Total reward [2.7792201042175293]
step 5151: Survived [1] steps
Total reward [-0.23515701293945312]
loss = 0.57602644
step 5156: Survived [4] steps
Total reward [2.1378517150878906]
step 5160: Survived [3] steps
Total reward [1.3061100244522095]
step 5166: Survived [5] steps
Total reward [2.924337863922119]
step 5169: Survived [2] steps
Total reward [0.5796951055526733]
step 5174: Survived [4] steps
Total reward [2.10422682762146]
step 5179: Survived [4] steps
Total reward [2.110174298286438]
step 5183: Survived [3] steps
Total reward [1.2187387943267822]
step 5187: Survived [3] steps
Total reward [1.416122555732727]
step 5189: Survived [1] steps
Total reward [-0.190842866897583]
step 5193: Survived [3] steps
Total reward [0.8761550188064575]
step 5199: Survived [5] steps
Total reward [2.809068441390991]
step 5206: Survived [6] steps
Total reward [3.735543727874756]
loss = 0.70080703
step 5209: Survived [2] steps
Total reward [0.550767183303833]
step 5212:

step 5819: Survived [4] steps
Total reward [2.2256330251693726]
step 5821: Survived [1] steps
Total reward [-0.2108842134475708]
loss = 1.1316055
step 5826: Survived [4] steps
Total reward [2.1843401193618774]
step 5833: Survived [6] steps
Total reward [3.6476203203201294]
step 5834: Survived [0] steps
Total reward [-1.0]
step 5835: Survived [0] steps
Total reward [-1.0]
step 5843: Survived [7] steps
Total reward [4.570396900177002]
step 5849: Survived [5] steps
Total reward [3.0696821212768555]
step 5857: Survived [7] steps
Total reward [4.6251702308654785]
step 5859: Survived [1] steps
Total reward [-0.1990211009979248]
step 5860: Survived [0] steps
Total reward [-1.0]
step 5869: Survived [8] steps
Total reward [5.2452181577682495]
step 5875: Survived [5] steps
Total reward [2.7278361320495605]
loss = 0.631388
step 5882: Survived [6] steps
Total reward [3.7295374870300293]
step 5883: Survived [0] steps
Total reward [-1.0]
step 5888: Survived [4] steps
Total reward [2.19302761554718]


step 6460: Survived [3] steps
Total reward [1.3221304416656494]
step 6461: Survived [0] steps
Total reward [-1.0]
step 6465: Survived [3] steps
Total reward [1.4668242931365967]
step 6469: Survived [3] steps
Total reward [1.346459984779358]
step 6473: Survived [3] steps
Total reward [1.416751742362976]
step 6476: Survived [2] steps
Total reward [0.5256680250167847]
step 6480: Survived [3] steps
Total reward [1.4096510410308838]
step 6485: Survived [4] steps
Total reward [2.0474205017089844]
step 6488: Survived [2] steps
Total reward [0.5485802888870239]
step 6492: Survived [3] steps
Total reward [1.2315764427185059]
loss = 0.622751
step 6496: Survived [3] steps
Total reward [1.2932617664337158]
step 6501: Survived [4] steps
Total reward [1.8409632444381714]
step 6505: Survived [3] steps
Total reward [1.44883131980896]
step 6509: Survived [3] steps
Total reward [1.312687873840332]
step 6513: Survived [3] steps
Total reward [1.184333324432373]
step 6518: Survived [4] steps
Total reward [

step 7024: Survived [1] steps
Total reward [-0.24123382568359375]
step 7027: Survived [2] steps
Total reward [0.6039412021636963]
step 7034: Survived [6] steps
Total reward [3.586214542388916]
step 7038: Survived [3] steps
Total reward [1.3749010562896729]
step 7042: Survived [3] steps
Total reward [1.3233287334442139]
step 7048: Survived [5] steps
Total reward [2.927552342414856]
step 7052: Survived [3] steps
Total reward [1.1793174743652344]
loss = 0.73960316
step 7056: Survived [3] steps
Total reward [1.3964641094207764]
step 7060: Survived [3] steps
Total reward [1.467395305633545]
step 7064: Survived [3] steps
Total reward [1.3612585067749023]
step 7068: Survived [3] steps
Total reward [1.252241611480713]
step 7074: Survived [5] steps
Total reward [2.8540111780166626]
step 7082: Survived [7] steps
Total reward [4.431638121604919]
step 7086: Survived [3] steps
Total reward [1.4648078680038452]
step 7088: Survived [1] steps
Total reward [-0.16525912284851074]
step 7092: Survived [3]

step 7539: Survived [3] steps
Total reward [1.4046378135681152]
step 7540: Survived [0] steps
Total reward [-1.0]
step 7545: Survived [4] steps
Total reward [2.224717855453491]
step 7547: Survived [1] steps
Total reward [-0.2470378875732422]
step 7554: Survived [6] steps
Total reward [3.9344184398651123]
step 7559: Survived [4] steps
Total reward [2.230248808860779]
loss = 0.44354516
step 7563: Survived [3] steps
Total reward [1.3770315647125244]
step 7565: Survived [1] steps
Total reward [-0.1752018928527832]
step 7571: Survived [5] steps
Total reward [3.061535358428955]
step 7577: Survived [5] steps
Total reward [3.017118453979492]
step 7580: Survived [2] steps
Total reward [0.6494369506835938]
step 7584: Survived [3] steps
Total reward [1.3899086713790894]
step 7589: Survived [4] steps
Total reward [2.252549409866333]
step 7595: Survived [5] steps
Total reward [3.057568073272705]
step 7599: Survived [3] steps
Total reward [1.2800832986831665]
step 7602: Survived [2] steps
Total rewa

step 8145: Survived [2] steps
Total reward [0.42182183265686035]
step 8150: Survived [4] steps
Total reward [1.7688329219818115]
step 8155: Survived [4] steps
Total reward [2.282720446586609]
step 8161: Survived [5] steps
Total reward [2.8927559852600098]
step 8164: Survived [2] steps
Total reward [0.42734575271606445]
step 8168: Survived [3] steps
Total reward [1.4204530715942383]
step 8172: Survived [3] steps
Total reward [0.7275713682174683]
loss = 0.06139397
step 8178: Survived [5] steps
Total reward [2.8447835445404053]
step 8184: Survived [5] steps
Total reward [2.8061641454696655]
step 8187: Survived [2] steps
Total reward [0.4746052026748657]
step 8190: Survived [2] steps
Total reward [0.1237940788269043]
step 8193: Survived [2] steps
Total reward [0.5538966655731201]
step 8198: Survived [4] steps
Total reward [2.166848063468933]
step 8202: Survived [3] steps
Total reward [1.4337239265441895]
step 8205: Survived [2] steps
Total reward [-0.05075562000274658]
step 8216: Survived 

step 8924: Survived [0] steps
Total reward [-1.0]
step 8928: Survived [3] steps
Total reward [1.4094839096069336]
step 8934: Survived [5] steps
Total reward [2.813414454460144]
step 8937: Survived [2] steps
Total reward [0.4953467845916748]
step 8942: Survived [4] steps
Total reward [1.9339970350265503]
step 8947: Survived [4] steps
Total reward [2.2106603384017944]
step 8955: Survived [7] steps
Total reward [4.792832732200623]
loss = 0.03937384
step 8962: Survived [6] steps
Total reward [3.5655118227005005]
step 8969: Survived [6] steps
Total reward [3.504468321800232]
step 8973: Survived [3] steps
Total reward [1.2707934379577637]
step 8979: Survived [5] steps
Total reward [2.878367781639099]
step 8983: Survived [3] steps
Total reward [1.318983793258667]
step 8989: Survived [5] steps
Total reward [2.799824357032776]
step 8996: Survived [6] steps
Total reward [3.5433682203292847]
Step [9000] -- Random [0.3081441143499318]
step 9004: Survived [7] steps
Total reward [4.390500068664551]


step 9623: Survived [5] steps
Total reward [2.98106586933136]
step 9631: Survived [7] steps
Total reward [4.393982887268066]
loss = 0.040021054
step 9635: Survived [3] steps
Total reward [1.4783384799957275]
step 9641: Survived [5] steps
Total reward [2.9016759395599365]
step 9643: Survived [1] steps
Total reward [-0.3004579544067383]
step 9646: Survived [2] steps
Total reward [0.5185689926147461]
step 9649: Survived [2] steps
Total reward [0.5491030216217041]
step 9655: Survived [5] steps
Total reward [2.856462836265564]
step 9659: Survived [3] steps
Total reward [1.243096113204956]
step 9665: Survived [5] steps
Total reward [3.042811870574951]
step 9670: Survived [4] steps
Total reward [2.221171736717224]
step 9677: Survived [6] steps
Total reward [3.6674455404281616]
step 9683: Survived [5] steps
Total reward [3.057362198829651]
loss = 0.14624459
step 9689: Survived [5] steps
Total reward [3.022346615791321]
step 9696: Survived [6] steps
Total reward [3.4820209741592407]
step 9701: 

step 10418: Survived [0] steps
Total reward [-1.0]
step 10427: Survived [8] steps
Total reward [5.503975033760071]
step 10430: Survived [2] steps
Total reward [0.489249587059021]
step 10444: Survived [13] steps
Total reward [9.267917275428772]
step 10455: Survived [10] steps
Total reward [6.695438027381897]
step 10464: Survived [8] steps
Total reward [5.416214942932129]
step 10466: Survived [1] steps
Total reward [-0.23494887351989746]
step 10471: Survived [4] steps
Total reward [2.275696039199829]
loss = 0.055784155
step 10473: Survived [1] steps
Total reward [-0.1966407299041748]
step 10488: Survived [14] steps
Total reward [9.449919939041138]
step 10493: Survived [4] steps
Total reward [2.076102137565613]
step 10496: Survived [2] steps
Total reward [0.4544004201889038]
step 10501: Survived [4] steps
Total reward [2.128824234008789]
step 10506: Survived [4] steps
Total reward [1.9950937032699585]
step 10510: Survived [3] steps
Total reward [1.3000891208648682]
step 10514: Survived [3

loss = 0.021637037
step 11149: Survived [12] steps
Total reward [7.985854864120483]
step 11153: Survived [3] steps
Total reward [1.3270622491836548]
step 11159: Survived [5] steps
Total reward [2.720005512237549]
step 11163: Survived [3] steps
Total reward [1.342576026916504]
step 11168: Survived [4] steps
Total reward [2.1954067945480347]
step 11172: Survived [3] steps
Total reward [1.3170502185821533]
step 11176: Survived [3] steps
Total reward [1.4018654823303223]
step 11180: Survived [3] steps
Total reward [1.4213308095932007]
step 11188: Survived [7] steps
Total reward [4.419190764427185]
step 11192: Survived [3] steps
Total reward [1.3225334882736206]
step 11196: Survived [3] steps
Total reward [1.459537148475647]
step 11199: Survived [2] steps
Total reward [0.6037520170211792]
loss = 0.011350443
step 11203: Survived [3] steps
Total reward [1.4542033672332764]
step 11211: Survived [7] steps
Total reward [4.431432604789734]
step 11214: Survived [2] steps
Total reward [0.5366772413

loss = 0.029258255
step 11987: Survived [5] steps
Total reward [2.6401562690734863]
step 11992: Survived [4] steps
Total reward [1.9498023986816406]
step 11993: Survived [0] steps
Total reward [-1.0]
step 11997: Survived [3] steps
Total reward [1.3239519596099854]
Step [12000] -- Random [0.20226643899168878]
step 12000: Survived [2] steps
Total reward [0.4492532014846802]
Successfully saved model at: saved_agent_DDDQN_no_opponent_20000/DDDQN.h5
step 12006: Survived [5] steps
Total reward [2.8211461305618286]
step 12013: Survived [6] steps
Total reward [3.619683265686035]
step 12019: Survived [5] steps
Total reward [2.7484188079833984]
step 12024: Survived [4] steps
Total reward [2.0373798608779907]
step 12025: Survived [0] steps
Total reward [-1.0]
step 12031: Survived [5] steps
Total reward [2.8129481077194214]
step 12037: Survived [5] steps
Total reward [2.8310123682022095]
loss = 0.058614556
step 12058: Survived [20] steps
Total reward [15.411989212036133]
step 12082: Survived [23] 

step 12986: Survived [18] steps
Total reward [12.660377740859985]
loss = 0.02525404
step 12995: Survived [8] steps
Total reward [5.3447312116622925]
Step [13000] -- Random [0.17206928502507693]
step 13047: Survived [51] steps
Total reward [39.45538556575775]
loss = 0.02825039
step 13056: Survived [8] steps
Total reward [5.456524014472961]
step 13074: Survived [17] steps
Total reward [12.421952962875366]
step 13081: Survived [6] steps
Total reward [3.922192692756653]
step 13084: Survived [2] steps
Total reward [0.5311965942382812]
loss = 0.013648618
step 13116: Survived [31] steps
Total reward [24.39645516872406]
step 13126: Survived [9] steps
Total reward [6.0782493352890015]
step 13127: Survived [0] steps
Total reward [-1.0]
step 13133: Survived [5] steps
Total reward [2.7513113021850586]
step 13138: Survived [4] steps
Total reward [2.175785779953003]
step 13144: Survived [5] steps
Total reward [2.9809746742248535]
step 13154: Survived [9] steps
Total reward [6.548145890235901]
step 1

step 14023: Survived [8] steps
Total reward [5.370548367500305]
step 14028: Survived [4] steps
Total reward [2.057470202445984]
step 14033: Survived [4] steps
Total reward [1.8040740489959717]
step 14055: Survived [21] steps
Total reward [14.92884647846222]
loss = 0.02631431
step 14061: Survived [5] steps
Total reward [2.7276625633239746]
step 14070: Survived [8] steps
Total reward [5.355183482170105]
step 14074: Survived [3] steps
Total reward [1.372827410697937]
step 14078: Survived [3] steps
Total reward [1.2575886249542236]
step 14084: Survived [5] steps
Total reward [2.772849202156067]
step 14090: Survived [5] steps
Total reward [2.619337320327759]
step 14097: Survived [6] steps
Total reward [3.602801561355591]
step 14098: Survived [0] steps
Total reward [-1.0]
loss = 0.014130779
step 14117: Survived [18] steps
Total reward [13.841280937194824]
step 14122: Survived [4] steps
Total reward [2.168637752532959]
step 14131: Survived [8] steps
Total reward [5.332444429397583]
step 14137

step 14970: Survived [2] steps
Total reward [0.5983835458755493]
step 14975: Survived [4] steps
Total reward [2.0301127433776855]
step 14981: Survived [5] steps
Total reward [2.8977932929992676]
step 14987: Survived [5] steps
Total reward [2.826159715652466]
Step [15000] -- Random [0.11737783083666044]
step 15001: Survived [13] steps
Total reward [9.76256537437439]
loss = 0.0155012645
step 15008: Survived [6] steps
Total reward [3.610480308532715]
step 15017: Survived [8] steps
Total reward [5.291070461273193]
step 15023: Survived [5] steps
Total reward [2.827407717704773]
step 15029: Survived [5] steps
Total reward [2.8008644580841064]
step 15037: Survived [7] steps
Total reward [4.192741394042969]
step 15043: Survived [5] steps
Total reward [2.85614550113678]
step 15051: Survived [7] steps
Total reward [4.118393182754517]
step 15055: Survived [3] steps
Total reward [1.2639673948287964]
step 15061: Survived [5] steps
Total reward [3.0082072019577026]
loss = 0.006000235
step 15067: Sur

127 {'disc_lines': array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False]), 'is_illegal': False, 'is_ambiguous': False, 'is_dispatching_illegal': True, 'is_illegal_reco': False, 'opponent_attack_line': None, 'opponent_attack_sub': None, 'opponent_attack_duration': 0, 'exception': [Grid2OpException AmbiguousAction InvalidRedispatching InvalidRedispatching('You cannot ask for a dispatch higher than pmax - pmin  [it would be always invalid because, even if the sepoint is pmin, this dispatch would set it to a number higher than pmax, which is impossible]. Invalid dispatch for generator(s): [1]',)], 'rewards': {}}
127 {'disc_lines': array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False]), 'is_illegal': False, 'is_ambiguous': False, 'is_dispatching_illegal': True, 'is_illegal_reco': 

129 {'disc_lines': array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False]), 'is_illegal': False, 'is_ambiguous': False, 'is_dispatching_illegal': True, 'is_illegal_reco': False, 'opponent_attack_line': None, 'opponent_attack_sub': None, 'opponent_attack_duration': 0, 'exception': [Grid2OpException AmbiguousAction InvalidRedispatching InvalidRedispatching('You cannot ask for a dispatch lower than pmin - pmax  [it would be always invalid because, even if the sepoint is pmax, this dispatch would set it to a number bellow pmin, which is impossible]. Invalid dispatch for generator(s): [1]',)], 'rewards': {}}
129 {'disc_lines': array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False]), 'is_illegal': False, 'is_ambiguous': False, 'is_dispatching_illegal': True, 'is_illegal_reco': False,

step 16237: Survived [19] steps
Total reward [13.591951251029968]
loss = 0.0010859963
step 16246: Survived [8] steps
Total reward [5.232967138290405]
step 16250: Survived [3] steps
Total reward [1.179023027420044]
step 16266: Survived [15] steps
Total reward [10.338696360588074]
step 16292: Survived [25] steps
Total reward [17.92546033859253]
step 16295: Survived [2] steps
Total reward [0.4298062324523926]
loss = 0.0013069786
5 {'disc_lines': array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False]), 'is_illegal': True, 'is_ambiguous': False, 'is_dispatching_illegal': False, 'is_illegal_reco': False, 'opponent_attack_line': None, 'opponent_attack_sub': None, 'opponent_attack_duration': 0, 'exception': [Grid2OpException IllegalAction IllegalAction('Powerline with ids [4] have been modified illegally (cooldown)',)], 'rewards': {}}
step 16312: Survived [16] steps
Total reward [10.113874

118 {'disc_lines': array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False]), 'is_illegal': False, 'is_ambiguous': False, 'is_dispatching_illegal': True, 'is_illegal_reco': False, 'opponent_attack_line': None, 'opponent_attack_sub': None, 'opponent_attack_duration': 0, 'exception': [Grid2OpException AmbiguousAction InvalidRedispatching InvalidRedispatching('You cannot ask for a dispatch higher than pmax - pmin  [it would be always invalid because, even if the sepoint is pmin, this dispatch would set it to a number higher than pmax, which is impossible]. Invalid dispatch for generator(s): [0]',)], 'rewards': {}}
118 {'disc_lines': array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False]), 'is_illegal': False, 'is_ambiguous': False, 'is_dispatching_illegal': True, 'is_illegal_reco': 

117 {'disc_lines': array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False]), 'is_illegal': False, 'is_ambiguous': False, 'is_dispatching_illegal': True, 'is_illegal_reco': False, 'opponent_attack_line': None, 'opponent_attack_sub': None, 'opponent_attack_duration': 0, 'exception': [Grid2OpException AmbiguousAction InvalidRedispatching InvalidRedispatching('You cannot ask for a dispatch higher than pmax - pmin  [it would be always invalid because, even if the sepoint is pmin, this dispatch would set it to a number higher than pmax, which is impossible]. Invalid dispatch for generator(s): [0]',)], 'rewards': {}}
loss = 0.0012755056
117 {'disc_lines': array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False]), 'is_illegal': False, 'is_ambiguous': False, 'is_dispatching_illegal': True,

117 {'disc_lines': array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False]), 'is_illegal': False, 'is_ambiguous': False, 'is_dispatching_illegal': True, 'is_illegal_reco': False, 'opponent_attack_line': None, 'opponent_attack_sub': None, 'opponent_attack_duration': 0, 'exception': [Grid2OpException AmbiguousAction InvalidRedispatching InvalidRedispatching('You cannot ask for a dispatch higher than pmax - pmin  [it would be always invalid because, even if the sepoint is pmin, this dispatch would set it to a number higher than pmax, which is impossible]. Invalid dispatch for generator(s): [0]',)], 'rewards': {}}
117 {'disc_lines': array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False]), 'is_illegal': False, 'is_ambiguous': False, 'is_dispatching_illegal': True, 'is_illegal_reco': 

118 {'disc_lines': array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False]), 'is_illegal': False, 'is_ambiguous': False, 'is_dispatching_illegal': True, 'is_illegal_reco': False, 'opponent_attack_line': None, 'opponent_attack_sub': None, 'opponent_attack_duration': 0, 'exception': [Grid2OpException AmbiguousAction InvalidRedispatching InvalidRedispatching('You cannot ask for a dispatch higher than pmax - pmin  [it would be always invalid because, even if the sepoint is pmin, this dispatch would set it to a number higher than pmax, which is impossible]. Invalid dispatch for generator(s): [0]',)], 'rewards': {}}
118 {'disc_lines': array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False]), 'is_illegal': False, 'is_ambiguous': False, 'is_dispatching_illegal': True, 'is_illegal_reco': 