In [1]:
#!/usr/bin/env python
from __future__ import print_function


import os
##disable graphics for cloud computing
os.putenv('SDL_VIDEODRIVER', 'fbcon')
os.environ["SDL_VIDEODRIVER"] = "dummy"
import argparse
import skimage as skimage
from skimage import transform, color, exposure
from skimage.transform import rotate
from skimage.viewer import ImageViewer
import sys
sys.path.append("game/")
import wrapped_flappy_bird as game
import random
import numpy as np
from collections import deque

  warn("Recommended matplotlib backend is `Agg` for full "


In [2]:

import json
from keras import initializers
from keras.initializers import normal, identity
from keras.models import model_from_json
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.optimizers import SGD , Adam
import tensorflow as tf

Using TensorFlow backend.


In [8]:
GAME = 'bird' # the name of the game being played for log files
CONFIG = 'nothreshold'
ACTIONS = 2 # number of valid actions
GAMMA = 0.99 # decay rate of past observations
OBSERVATION = 3200. # timesteps to observe before training
EXPLORE = 3000000. # frames over which to anneal epsilon
FINAL_EPSILON = 0.0001 # final value of epsilon
INITIAL_EPSILON = 1.0#0.1 # starting value of epsilon
REPLAY_MEMORY = 50000 # number of previous transitions to remember
BATCH = 32 # size of minibatch
FRAME_PER_ACTION = 1
LEARNING_RATE = 1e-4

In [4]:
img_rows , img_cols = 80, 80
#Convert image into Black and white
img_channels = 4 #We stack 4 frames

def buildmodel():
    print("Now we build the model")
    model = Sequential()
    model.add(Convolution2D(32, 8, 8, subsample=(4, 4), border_mode='same',input_shape=(img_rows,img_cols,img_channels)))  #80*80*4
    model.add(Activation('relu'))
    model.add(Convolution2D(64, 4, 4, subsample=(2, 2), border_mode='same'))
    model.add(Activation('relu'))
    model.add(Convolution2D(64, 3, 3, subsample=(1, 1), border_mode='same'))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dense(2))
   
    adam = Adam(lr=LEARNING_RATE)
    model.compile(loss='mse',optimizer=adam)
    print("We finish building the model")
    return model

In [9]:
def trainNetwork(model,args='train'):
    # open up a game state to communicate with emulator
    game_state = game.GameState()

    # store the previous observations in replay memory
    D = deque()

    # get the first state by doing nothing and preprocess the image to 80x80x4
    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] = 1
    x_t, r_0, terminal = game_state.frame_step(do_nothing)

    x_t = skimage.color.rgb2gray(x_t)
    x_t = skimage.transform.resize(x_t,(80,80))
    x_t = skimage.exposure.rescale_intensity(x_t,out_range=(0,255))

    s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)
    #print (s_t.shape)

    #In Keras, need to reshape
    s_t = s_t.reshape(1, s_t.shape[0], s_t.shape[1], s_t.shape[2])  #1*80*80*4

    

    if args == 'Run':
        print('running with saved model')
        OBSERVE = 999999999    #We keep observe, never train
        epsilon = FINAL_EPSILON
        print ("Now we load weight")
        model.load_weights("model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse',optimizer=adam)
        print ("Weight load successfully")    
    else:                       #We go to training mode
        OBSERVE = OBSERVATION
        epsilon = INITIAL_EPSILON

    t = 0
    while (True):
        loss = 0
        Q_sa = 0
        action_index = 0
        r_t = 0
        a_t = np.zeros([ACTIONS])
        #choose an action epsilon greedy
        if t % FRAME_PER_ACTION == 0:
            if random.random() <= epsilon:
                print("----------Random Action----------")
                if random.random() <= 0.95:
                    action_index=0
                else:
                    action_index=1
                #action_index = random.randrange(ACTIONS)
                a_t[action_index] = 1
            else:
                q = model.predict(s_t)       #input a stack of 4 images, get the prediction
                max_Q = np.argmax(q)
                action_index = max_Q
                a_t[max_Q] = 1

        #We reduced the epsilon gradually
        if epsilon > FINAL_EPSILON and t > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE

        #run the selected action and observed next state and reward
        x_t1_colored, r_t, terminal = game_state.frame_step(a_t)

        x_t1 = skimage.color.rgb2gray(x_t1_colored)
        x_t1 = skimage.transform.resize(x_t1,(80,80))
        x_t1 = skimage.exposure.rescale_intensity(x_t1, out_range=(0, 255))

        x_t1 = x_t1.reshape(1, x_t1.shape[0], x_t1.shape[1], 1) #1x80x80x1
        s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3)

        # store the transition in D
        D.append((s_t, action_index, r_t, s_t1, terminal))
        if len(D) > REPLAY_MEMORY:
            D.popleft()

        #only train if done observing
        if t > OBSERVE:
            #sample a minibatch to train on
            minibatch = random.sample(D, BATCH)



            inputs = np.zeros((BATCH, s_t.shape[1], s_t.shape[2], s_t.shape[3]))   #32, 80, 80, 4
            print (inputs.shape)
            targets = np.zeros((inputs.shape[0], ACTIONS))                         #32, 2

            #Now we do the experience replay
            for i in range(0, len(minibatch)):
                state_t = minibatch[i][0]
                action_t = minibatch[i][1]   #This is action index
                reward_t = minibatch[i][2]
                state_t1 = minibatch[i][3]
                terminal = minibatch[i][4]
                # if terminated, only equals reward

                inputs[i:i + 1] = state_t    #I saved down s_t

                targets[i] = model.predict(state_t)  # Hitting each buttom probability
                Q_sa = model.predict(state_t1)

                if terminal:
                    targets[i, action_t] = reward_t
                else:
                    targets[i, action_t] = reward_t + GAMMA * np.max(Q_sa)

            # targets2 = normalize(targets)
            loss += model.train_on_batch(inputs, targets)

        s_t = s_t1
        t = t + 1

        # save progress every 10000 iterations
        if t % 1000 == 0:
            print("Now we save model")
            model.save_weights("model.h5", overwrite=True)
            with open("model.json", "w") as outfile:
                json.dump(model.to_json(), outfile)

        # print info
        state = ""
        if t <= OBSERVE:
            state = "observe"
        elif t > OBSERVE and t <= OBSERVE + EXPLORE:
            state = "explore"
        else:
            state = "train"

        print("TIMESTEP", t, "/ STATE", state, \
            "/ EPSILON", epsilon, "/ ACTION", action_index, "/ REWARD", r_t, \
            "/ Q_MAX " , np.max(Q_sa), "/ Loss ", loss)

    print("Episode finished!")
    print("************************")

In [10]:
def playGame(args='train'):
    model = buildmodel()
    trainNetwork(model,args)

def main():
    parser = argparse.ArgumentParser(description='Description of your program')
    parser.add_argument('-m','--mode', help='Train / Run', required=True)
    args = vars(parser.parse_args())
    playGame(args)

In [11]:
playGame()

Now we build the model


  
  # Remove the CWD from sys.path while we load stuff.
  if sys.path[0] == '':


We finish building the model
----------Random Action----------
TIMESTEP 1 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 3 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 4 / STATE observe / EPSILON 1.0 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 5 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 6 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 7 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 8 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------R

TIMESTEP 74 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 75 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 76 / STATE observe / EPSILON 1.0 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 77 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 78 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 79 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 80 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 81 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 82 / STATE observe / EP

TIMESTEP 144 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 145 / STATE observe / EPSILON 1.0 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 146 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 147 / STATE observe / EPSILON 1.0 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 148 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 149 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD -1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 150 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 151 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 152 / STATE obse

TIMESTEP 213 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 214 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 215 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 216 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 217 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 218 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 219 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 220 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 221 / STATE obs

TIMESTEP 282 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 283 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 284 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 285 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 286 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 287 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 288 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 289 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 290 / STATE obs

TIMESTEP 350 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 351 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 352 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 353 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 354 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 355 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 356 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 357 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 358 / STATE obs

TIMESTEP 420 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 421 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 422 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 423 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 424 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 425 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 426 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 427 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 428 / STATE obs

TIMESTEP 493 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 494 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 495 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 496 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 497 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 498 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 499 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD -1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 500 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 501 / STATE obse

TIMESTEP 568 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 569 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 570 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 571 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 572 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 573 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 574 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 575 / STATE observe / EPSILON 1.0 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 576 / STATE obs

TIMESTEP 636 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 637 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 638 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 639 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 640 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 641 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 642 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 643 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 644 / STATE obs

TIMESTEP 706 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 707 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 708 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 709 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 710 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 711 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 712 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 713 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 714 / STATE obs

TIMESTEP 774 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 775 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 776 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 777 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 778 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 779 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 780 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 781 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 782 / STATE obs

TIMESTEP 847 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 848 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 849 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD -1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 850 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 851 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 852 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 853 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 854 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 855 / STATE obse

TIMESTEP 916 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 917 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 918 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 919 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 920 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 921 / STATE observe / EPSILON 1.0 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 922 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 923 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 924 / STATE obs

TIMESTEP 985 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 986 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 987 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 988 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 989 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 990 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 991 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 992 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 993 / STATE obs

TIMESTEP 1053 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1054 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1055 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1056 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1057 / STATE observe / EPSILON 1.0 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1058 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1059 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1060 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1061 / 

TIMESTEP 1122 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1123 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1124 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1125 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1126 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1127 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1128 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1129 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1130 / 

TIMESTEP 1191 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1192 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1193 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1194 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1195 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1196 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1197 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1198 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1199 / 

TIMESTEP 1259 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1260 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1261 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1262 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1263 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1264 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1265 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1266 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1267 / 

TIMESTEP 1329 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1330 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1331 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1332 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1333 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1334 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1335 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1336 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1337 / 

TIMESTEP 1398 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1399 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD -1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1400 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1401 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1402 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1403 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1404 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1405 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1406 / S

TIMESTEP 1465 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1466 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1467 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1468 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1469 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1470 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1471 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1472 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1473 / 

TIMESTEP 1534 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1535 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1536 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1537 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1538 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1539 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1540 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1541 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1542 / 

TIMESTEP 1603 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1604 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1605 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1606 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1607 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1608 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1609 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1610 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1611 / 

TIMESTEP 1672 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1673 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1674 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1675 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1676 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1677 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1678 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1679 / STATE observe / EPSILON 1.0 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1680 / 

TIMESTEP 1740 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1741 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1742 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1743 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1744 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1745 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1746 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1747 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1748 / 

TIMESTEP 1810 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1811 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1812 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1813 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1814 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1815 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1816 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1817 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1818 / 

TIMESTEP 1878 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1879 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1880 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1881 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1882 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1883 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1884 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1885 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1886 / 

TIMESTEP 1948 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1949 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD -1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1950 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1951 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1952 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1953 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1954 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1955 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 1956 / S

TIMESTEP 2015 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2016 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2017 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2018 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2019 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2020 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2021 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2022 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2023 / 

TIMESTEP 2085 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2086 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2087 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2088 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2089 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2090 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2091 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2092 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2093 / 

TIMESTEP 2152 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2153 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2154 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2155 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2156 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2157 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2158 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2159 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2160 / 

TIMESTEP 2220 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2221 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2222 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2223 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2224 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2225 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2226 / STATE observe / EPSILON 1.0 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2227 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2228 / 

TIMESTEP 2289 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2290 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2291 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2292 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2293 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2294 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2295 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2296 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2297 / 

TIMESTEP 2359 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2360 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2361 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2362 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2363 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2364 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2365 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2366 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2367 / 

TIMESTEP 2427 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2428 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2429 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2430 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2431 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2432 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2433 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2434 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2435 / 

TIMESTEP 2495 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2496 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2497 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2498 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2499 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD -1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2500 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2501 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2502 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2503 / S

TIMESTEP 2564 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2565 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2566 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2567 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2568 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2569 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2570 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2571 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2572 / 

TIMESTEP 2634 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2635 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2636 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2637 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2638 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2639 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2640 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2641 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2642 / 

TIMESTEP 2702 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2703 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2704 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2705 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2706 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2707 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2708 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2709 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2710 / 

TIMESTEP 2771 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2772 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2773 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2774 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2775 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2776 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2777 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2778 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2779 / 

TIMESTEP 2841 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2842 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2843 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2844 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2845 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2846 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2847 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2848 / STATE observe / EPSILON 1.0 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2849 / 

TIMESTEP 2911 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2912 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2913 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2914 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2915 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2916 / STATE observe / EPSILON 1.0 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2917 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2918 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2919 / 

TIMESTEP 2979 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2980 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2981 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2982 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2983 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2984 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2985 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2986 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 2987 / 

TIMESTEP 3048 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 3049 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD -1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 3050 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 3051 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 3052 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 3053 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 3054 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 3055 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 3056 / S

TIMESTEP 3116 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 3117 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 3118 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 3119 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 3120 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 3121 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 3122 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 3123 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 3124 / 

TIMESTEP 3183 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 3184 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 3185 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 3186 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 3187 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 3188 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 3189 / STATE observe / EPSILON 1.0 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 3190 / STATE observe / EPSILON 1.0 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
TIMESTEP 3191 / 

TIMESTEP 3241 / STATE explore / EPSILON 0.999986668 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3242 / STATE explore / EPSILON 0.9999863347 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3243 / STATE explore / EPSILON 0.9999860014 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3244 / STATE explore / EPSILON 0.9999856681 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3245 / STATE explore / EPSILON 0.9999853348 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3246 / STATE explore / EPSILON 0.9999850015 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3247 / STATE explore / EPSILON 0.9999846682 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 3295 / STATE explore / EPSILON 0.9999686698 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3296 / STATE explore / EPSILON 0.9999683365 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3297 / STATE explore / EPSILON 0.9999680032 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3298 / STATE explore / EPSILON 0.9999676699 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3299 / STATE explore / EPSILON 0.9999673366 / ACTION 1 / REWARD -1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3300 / STATE explore / EPSILON 0.9999670033 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3301 / STATE explore / EPSILON 0.99996667 / ACTION 1 / REWARD 0.1 / Q_MAX  nan /

TIMESTEP 3349 / STATE explore / EPSILON 0.9999506716 / ACTION 1 / REWARD -1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3350 / STATE explore / EPSILON 0.9999503383 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3351 / STATE explore / EPSILON 0.999950005 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3352 / STATE explore / EPSILON 0.9999496717 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3353 / STATE explore / EPSILON 0.9999493384 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3354 / STATE explore / EPSILON 0.9999490051 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3355 / STATE explore / EPSILON 0.9999486718 / ACTION 1 / REWARD 0.1 / Q_MAX  nan 

TIMESTEP 3403 / STATE explore / EPSILON 0.9999326734 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3404 / STATE explore / EPSILON 0.9999323401 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3405 / STATE explore / EPSILON 0.9999320068 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3406 / STATE explore / EPSILON 0.9999316735 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3407 / STATE explore / EPSILON 0.9999313402 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3408 / STATE explore / EPSILON 0.9999310069 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3409 / STATE explore / EPSILON 0.9999306736 / ACTION 1 / REWARD 0.1 / Q_MAX  na

TIMESTEP 3457 / STATE explore / EPSILON 0.9999146752 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3458 / STATE explore / EPSILON 0.9999143419 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3459 / STATE explore / EPSILON 0.9999140086 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3460 / STATE explore / EPSILON 0.9999136753 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3461 / STATE explore / EPSILON 0.999913342 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3462 / STATE explore / EPSILON 0.9999130087 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3463 / STATE explore / EPSILON 0.9999126754 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 3511 / STATE explore / EPSILON 0.999896677 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3512 / STATE explore / EPSILON 0.9998963437 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3513 / STATE explore / EPSILON 0.9998960104 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3514 / STATE explore / EPSILON 0.9998956771 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3515 / STATE explore / EPSILON 0.9998953438 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3516 / STATE explore / EPSILON 0.9998950105 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3517 / STATE explore / EPSILON 0.9998946772 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 3565 / STATE explore / EPSILON 0.9998786788 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3566 / STATE explore / EPSILON 0.9998783455 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3567 / STATE explore / EPSILON 0.9998780122 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3568 / STATE explore / EPSILON 0.9998776789 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3569 / STATE explore / EPSILON 0.9998773456 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3570 / STATE explore / EPSILON 0.9998770123 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3571 / STATE explore / EPSILON 0.999876679 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 3619 / STATE explore / EPSILON 0.9998606806 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3620 / STATE explore / EPSILON 0.9998603473 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3621 / STATE explore / EPSILON 0.999860014 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3622 / STATE explore / EPSILON 0.9998596807 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3623 / STATE explore / EPSILON 0.9998593474 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3624 / STATE explore / EPSILON 0.9998590141 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3625 / STATE explore / EPSILON 0.9998586808 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 3673 / STATE explore / EPSILON 0.9998426824 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3674 / STATE explore / EPSILON 0.9998423491 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3675 / STATE explore / EPSILON 0.9998420158 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3676 / STATE explore / EPSILON 0.9998416825 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3677 / STATE explore / EPSILON 0.9998413492 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3678 / STATE explore / EPSILON 0.9998410159 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3679 / STATE explore / EPSILON 0.9998406826 / ACTION 1 / REWARD 0.1 / Q_MAX  na

TIMESTEP 3727 / STATE explore / EPSILON 0.9998246842 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3728 / STATE explore / EPSILON 0.9998243509 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3729 / STATE explore / EPSILON 0.9998240176 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3730 / STATE explore / EPSILON 0.9998236843 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3731 / STATE explore / EPSILON 0.999823351 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3732 / STATE explore / EPSILON 0.9998230177 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3733 / STATE explore / EPSILON 0.9998226844 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 3781 / STATE explore / EPSILON 0.999806686 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3782 / STATE explore / EPSILON 0.9998063527 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3783 / STATE explore / EPSILON 0.9998060194 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3784 / STATE explore / EPSILON 0.9998056861 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3785 / STATE explore / EPSILON 0.9998053528 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3786 / STATE explore / EPSILON 0.9998050195 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3787 / STATE explore / EPSILON 0.9998046862 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 3835 / STATE explore / EPSILON 0.9997886878 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3836 / STATE explore / EPSILON 0.9997883545 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3837 / STATE explore / EPSILON 0.9997880212 / ACTION 0 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3838 / STATE explore / EPSILON 0.9997876879 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3839 / STATE explore / EPSILON 0.9997873546 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3840 / STATE explore / EPSILON 0.9997870213 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3841 / STATE explore / EPSILON 0.999786688 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 3889 / STATE explore / EPSILON 0.9997706896 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3890 / STATE explore / EPSILON 0.9997703563 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3891 / STATE explore / EPSILON 0.999770023 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3892 / STATE explore / EPSILON 0.9997696897 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3893 / STATE explore / EPSILON 0.9997693564 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3894 / STATE explore / EPSILON 0.9997690231 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3895 / STATE explore / EPSILON 0.9997686898 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 3943 / STATE explore / EPSILON 0.9997526914 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3944 / STATE explore / EPSILON 0.9997523581 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3945 / STATE explore / EPSILON 0.9997520248 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3946 / STATE explore / EPSILON 0.9997516915 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3947 / STATE explore / EPSILON 0.9997513582 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3948 / STATE explore / EPSILON 0.9997510249 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3949 / STATE explore / EPSILON 0.9997506916 / ACTION 1 / REWARD -1 / Q_MAX  nan

TIMESTEP 3997 / STATE explore / EPSILON 0.9997346932 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3998 / STATE explore / EPSILON 0.9997343599 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 3999 / STATE explore / EPSILON 0.9997340266 / ACTION 1 / REWARD -1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
Now we save model
TIMESTEP 4000 / STATE explore / EPSILON 0.9997336933 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4001 / STATE explore / EPSILON 0.99973336 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4002 / STATE explore / EPSILON 0.9997330267 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4003 / STATE explore / EPSILON 0.9997326934 / ACTION 1 / REWARD 

TIMESTEP 4051 / STATE explore / EPSILON 0.999716695 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4052 / STATE explore / EPSILON 0.9997163617 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4053 / STATE explore / EPSILON 0.9997160284 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4054 / STATE explore / EPSILON 0.9997156951 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4055 / STATE explore / EPSILON 0.9997153618 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4056 / STATE explore / EPSILON 0.9997150285 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4057 / STATE explore / EPSILON 0.9997146952 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 4105 / STATE explore / EPSILON 0.9996986968 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4106 / STATE explore / EPSILON 0.9996983635 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4107 / STATE explore / EPSILON 0.9996980302 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4108 / STATE explore / EPSILON 0.9996976969 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4109 / STATE explore / EPSILON 0.9996973636 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4110 / STATE explore / EPSILON 0.9996970303 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4111 / STATE explore / EPSILON 0.999696697 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 4159 / STATE explore / EPSILON 0.9996806986 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4160 / STATE explore / EPSILON 0.9996803653 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4161 / STATE explore / EPSILON 0.999680032 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4162 / STATE explore / EPSILON 0.9996796987 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4163 / STATE explore / EPSILON 0.9996793654 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4164 / STATE explore / EPSILON 0.9996790321 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4165 / STATE explore / EPSILON 0.9996786988 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 4213 / STATE explore / EPSILON 0.9996627004 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4214 / STATE explore / EPSILON 0.9996623671 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4215 / STATE explore / EPSILON 0.9996620338 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4216 / STATE explore / EPSILON 0.9996617005 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4217 / STATE explore / EPSILON 0.9996613672 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4218 / STATE explore / EPSILON 0.9996610339 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4219 / STATE explore / EPSILON 0.9996607006 / ACTION 1 / REWARD 0.1 / Q_MAX  na

TIMESTEP 4267 / STATE explore / EPSILON 0.9996447022 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4268 / STATE explore / EPSILON 0.9996443689 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4269 / STATE explore / EPSILON 0.9996440356 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4270 / STATE explore / EPSILON 0.9996437023 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4271 / STATE explore / EPSILON 0.999643369 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4272 / STATE explore / EPSILON 0.9996430357 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4273 / STATE explore / EPSILON 0.9996427024 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 4321 / STATE explore / EPSILON 0.999626704 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4322 / STATE explore / EPSILON 0.9996263707 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4323 / STATE explore / EPSILON 0.9996260374 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4324 / STATE explore / EPSILON 0.9996257041 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4325 / STATE explore / EPSILON 0.9996253708 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4326 / STATE explore / EPSILON 0.9996250375 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4327 / STATE explore / EPSILON 0.9996247042 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 4375 / STATE explore / EPSILON 0.9996087058 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4376 / STATE explore / EPSILON 0.9996083725 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4377 / STATE explore / EPSILON 0.9996080392 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4378 / STATE explore / EPSILON 0.9996077059 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4379 / STATE explore / EPSILON 0.9996073726 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4380 / STATE explore / EPSILON 0.9996070393 / ACTION 0 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4381 / STATE explore / EPSILON 0.999606706 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 4429 / STATE explore / EPSILON 0.9995907076 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4430 / STATE explore / EPSILON 0.9995903743 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4431 / STATE explore / EPSILON 0.999590041 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4432 / STATE explore / EPSILON 0.9995897077 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4433 / STATE explore / EPSILON 0.9995893744 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4434 / STATE explore / EPSILON 0.9995890411 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4435 / STATE explore / EPSILON 0.9995887078 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 4483 / STATE explore / EPSILON 0.9995727094 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4484 / STATE explore / EPSILON 0.9995723761 / ACTION 0 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4485 / STATE explore / EPSILON 0.9995720428 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4486 / STATE explore / EPSILON 0.9995717095 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4487 / STATE explore / EPSILON 0.9995713762 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4488 / STATE explore / EPSILON 0.9995710429 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4489 / STATE explore / EPSILON 0.9995707096 / ACTION 1 / REWARD 0.1 / Q_MAX  na

TIMESTEP 4537 / STATE explore / EPSILON 0.9995547112 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4538 / STATE explore / EPSILON 0.9995543779 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4539 / STATE explore / EPSILON 0.9995540446 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4540 / STATE explore / EPSILON 0.9995537113 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4541 / STATE explore / EPSILON 0.999553378 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4542 / STATE explore / EPSILON 0.9995530447 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4543 / STATE explore / EPSILON 0.9995527114 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 4591 / STATE explore / EPSILON 0.999536713 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4592 / STATE explore / EPSILON 0.9995363797 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4593 / STATE explore / EPSILON 0.9995360464 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4594 / STATE explore / EPSILON 0.9995357131 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4595 / STATE explore / EPSILON 0.9995353798 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4596 / STATE explore / EPSILON 0.9995350465 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4597 / STATE explore / EPSILON 0.9995347132 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 4645 / STATE explore / EPSILON 0.9995187148 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4646 / STATE explore / EPSILON 0.9995183815 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4647 / STATE explore / EPSILON 0.9995180482 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4648 / STATE explore / EPSILON 0.9995177149 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4649 / STATE explore / EPSILON 0.9995173816 / ACTION 1 / REWARD -1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4650 / STATE explore / EPSILON 0.9995170483 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4651 / STATE explore / EPSILON 0.999516715 / ACTION 1 / REWARD 0.1 / Q_MAX  nan 

TIMESTEP 4700 / STATE explore / EPSILON 0.9995003833 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4701 / STATE explore / EPSILON 0.99950005 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4702 / STATE explore / EPSILON 0.9994997167 / ACTION 0 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4703 / STATE explore / EPSILON 0.9994993834 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4704 / STATE explore / EPSILON 0.9994990501 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4705 / STATE explore / EPSILON 0.9994987168 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4706 / STATE explore / EPSILON 0.9994983835 / ACTION 1 / REWARD 0.1 / Q_MAX  nan 

TIMESTEP 4754 / STATE explore / EPSILON 0.9994823851 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4755 / STATE explore / EPSILON 0.9994820518 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4756 / STATE explore / EPSILON 0.9994817185 / ACTION 0 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4757 / STATE explore / EPSILON 0.9994813852 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4758 / STATE explore / EPSILON 0.9994810519 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4759 / STATE explore / EPSILON 0.9994807186 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4760 / STATE explore / EPSILON 0.9994803853 / ACTION 1 / REWARD 0.1 / Q_MAX  na

TIMESTEP 4808 / STATE explore / EPSILON 0.9994643869 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4809 / STATE explore / EPSILON 0.9994640536 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4810 / STATE explore / EPSILON 0.9994637203 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4811 / STATE explore / EPSILON 0.999463387 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4812 / STATE explore / EPSILON 0.9994630537 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4813 / STATE explore / EPSILON 0.9994627204 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4814 / STATE explore / EPSILON 0.9994623871 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 4862 / STATE explore / EPSILON 0.9994463887 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4863 / STATE explore / EPSILON 0.9994460554 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4864 / STATE explore / EPSILON 0.9994457221 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4865 / STATE explore / EPSILON 0.9994453888 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4866 / STATE explore / EPSILON 0.9994450555 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4867 / STATE explore / EPSILON 0.9994447222 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4868 / STATE explore / EPSILON 0.9994443889 / ACTION 1 / REWARD 0.1 / Q_MAX  na

TIMESTEP 4916 / STATE explore / EPSILON 0.9994283905 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4917 / STATE explore / EPSILON 0.9994280572 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4918 / STATE explore / EPSILON 0.9994277239 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4919 / STATE explore / EPSILON 0.9994273906 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4920 / STATE explore / EPSILON 0.9994270573 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4921 / STATE explore / EPSILON 0.999426724 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4922 / STATE explore / EPSILON 0.9994263907 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 4970 / STATE explore / EPSILON 0.9994103923 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4971 / STATE explore / EPSILON 0.999410059 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4972 / STATE explore / EPSILON 0.9994097257 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4973 / STATE explore / EPSILON 0.9994093924 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4974 / STATE explore / EPSILON 0.9994090591 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4975 / STATE explore / EPSILON 0.9994087258 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 4976 / STATE explore / EPSILON 0.9994083925 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 5024 / STATE explore / EPSILON 0.9993923941 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5025 / STATE explore / EPSILON 0.9993920608 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5026 / STATE explore / EPSILON 0.9993917275 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5027 / STATE explore / EPSILON 0.9993913942 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5028 / STATE explore / EPSILON 0.9993910609 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5029 / STATE explore / EPSILON 0.9993907276 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5030 / STATE explore / EPSILON 0.9993903943 / ACTION 1 / REWARD 0.1 / Q_MAX  na

TIMESTEP 5078 / STATE explore / EPSILON 0.9993743959 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5079 / STATE explore / EPSILON 0.9993740626 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5080 / STATE explore / EPSILON 0.9993737293 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5081 / STATE explore / EPSILON 0.999373396 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5082 / STATE explore / EPSILON 0.9993730627 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5083 / STATE explore / EPSILON 0.9993727294 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5084 / STATE explore / EPSILON 0.9993723961 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 5132 / STATE explore / EPSILON 0.9993563977 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5133 / STATE explore / EPSILON 0.9993560644 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5134 / STATE explore / EPSILON 0.9993557311 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5135 / STATE explore / EPSILON 0.9993553978 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5136 / STATE explore / EPSILON 0.9993550645 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5137 / STATE explore / EPSILON 0.9993547312 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5138 / STATE explore / EPSILON 0.9993543979 / ACTION 1 / REWARD 0.1 / Q_MAX  na

TIMESTEP 5186 / STATE explore / EPSILON 0.9993383995 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5187 / STATE explore / EPSILON 0.9993380662 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5188 / STATE explore / EPSILON 0.9993377329 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5189 / STATE explore / EPSILON 0.9993373996 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5190 / STATE explore / EPSILON 0.9993370663 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5191 / STATE explore / EPSILON 0.999336733 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5192 / STATE explore / EPSILON 0.9993363997 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 5240 / STATE explore / EPSILON 0.9993204013 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5241 / STATE explore / EPSILON 0.999320068 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5242 / STATE explore / EPSILON 0.9993197347 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5243 / STATE explore / EPSILON 0.9993194014 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5244 / STATE explore / EPSILON 0.9993190681 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5245 / STATE explore / EPSILON 0.9993187348 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5246 / STATE explore / EPSILON 0.9993184015 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 5294 / STATE explore / EPSILON 0.9993024031 / ACTION 0 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5295 / STATE explore / EPSILON 0.9993020698 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5296 / STATE explore / EPSILON 0.9993017365 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5297 / STATE explore / EPSILON 0.9993014032 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5298 / STATE explore / EPSILON 0.9993010699 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5299 / STATE explore / EPSILON 0.9993007366 / ACTION 1 / REWARD -1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5300 / STATE explore / EPSILON 0.9993004033 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 5348 / STATE explore / EPSILON 0.9992844049 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5349 / STATE explore / EPSILON 0.9992840716 / ACTION 1 / REWARD -1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5350 / STATE explore / EPSILON 0.9992837383 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5351 / STATE explore / EPSILON 0.999283405 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5352 / STATE explore / EPSILON 0.9992830717 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5353 / STATE explore / EPSILON 0.9992827384 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5354 / STATE explore / EPSILON 0.9992824051 / ACTION 1 / REWARD 0.1 / Q_MAX  nan 

TIMESTEP 5402 / STATE explore / EPSILON 0.9992664067 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5403 / STATE explore / EPSILON 0.9992660734 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5404 / STATE explore / EPSILON 0.9992657401 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5405 / STATE explore / EPSILON 0.9992654068 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5406 / STATE explore / EPSILON 0.9992650735 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5407 / STATE explore / EPSILON 0.9992647402 / ACTION 0 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5408 / STATE explore / EPSILON 0.9992644069 / ACTION 1 / REWARD 0.1 / Q_MAX  na

TIMESTEP 5456 / STATE explore / EPSILON 0.9992484085 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5457 / STATE explore / EPSILON 0.9992480752 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5458 / STATE explore / EPSILON 0.9992477419 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5459 / STATE explore / EPSILON 0.9992474086 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5460 / STATE explore / EPSILON 0.9992470753 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5461 / STATE explore / EPSILON 0.999246742 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5462 / STATE explore / EPSILON 0.9992464087 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 5510 / STATE explore / EPSILON 0.9992304103 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5511 / STATE explore / EPSILON 0.999230077 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5512 / STATE explore / EPSILON 0.9992297437 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5513 / STATE explore / EPSILON 0.9992294104 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5514 / STATE explore / EPSILON 0.9992290771 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5515 / STATE explore / EPSILON 0.9992287438 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5516 / STATE explore / EPSILON 0.9992284105 / ACTION 1 / REWARD 0.1 / Q_MAX  nan

TIMESTEP 5564 / STATE explore / EPSILON 0.9992124121 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5565 / STATE explore / EPSILON 0.9992120788 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5566 / STATE explore / EPSILON 0.9992117455 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5567 / STATE explore / EPSILON 0.9992114122 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5568 / STATE explore / EPSILON 0.9992110789 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5569 / STATE explore / EPSILON 0.9992107456 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5570 / STATE explore / EPSILON 0.9992104123 / ACTION 1 / REWARD 0.1 / Q_MAX  na

TIMESTEP 5618 / STATE explore / EPSILON 0.9991944139 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5619 / STATE explore / EPSILON 0.9991940806 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5620 / STATE explore / EPSILON 0.9991937473 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5621 / STATE explore / EPSILON 0.999193414 / ACTION 1 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)
TIMESTEP 5622 / STATE explore / EPSILON 0.9991930807 / ACTION 0 / REWARD 0.1 / Q_MAX  nan / Loss  nan
----------Random Action----------
(32, 80, 80, 4)


KeyboardInterrupt: 

In [28]:
playGame(args='Run')

Now we build the model
We finish building the model

  
  # Remove the CWD from sys.path while we load stuff.
  if sys.path[0] == '':



running with saved model
Now we load weight
Weight load successfully
TIMESTEP 1 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 2 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 3 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 4 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 5 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 6 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 7 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 8 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 9 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 10 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 11 / STATE observe / EPSILON 0

TIMESTEP 95 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 96 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 97 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 98 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 99 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 100 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 101 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 102 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 103 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 104 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 105 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIME

TIMESTEP 187 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 188 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 189 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD -1 / Q_MAX  0 / Loss  0
TIMESTEP 190 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 191 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 192 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 193 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 194 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 195 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 196 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 197 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0


TIMESTEP 279 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 280 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 281 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 282 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 283 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 284 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD -1 / Q_MAX  0 / Loss  0
TIMESTEP 285 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 286 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 287 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 288 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 289 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0


TIMESTEP 370 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 371 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 372 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 373 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 374 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 375 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 376 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 377 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 378 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 379 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD -1 / Q_MAX  0 / Loss  0
TIMESTEP 380 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0


TIMESTEP 462 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 463 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 464 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 465 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 466 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 467 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 468 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 469 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 470 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 471 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 472 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0

TIMESTEP 556 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 557 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 558 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 559 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 560 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 561 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 562 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 563 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 564 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 565 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 566 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0

TIMESTEP 647 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 648 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 649 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 650 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 651 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 652 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 653 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 654 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 655 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 656 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 657 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0

TIMESTEP 739 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 740 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD -1 / Q_MAX  0 / Loss  0
TIMESTEP 741 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 742 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 743 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 744 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 745 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 746 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 747 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 748 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 749 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0


TIMESTEP 830 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 831 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 832 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 833 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 834 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 835 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD -1 / Q_MAX  0 / Loss  0
TIMESTEP 836 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 837 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 838 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 839 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 840 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0


TIMESTEP 923 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 924 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 925 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 926 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 927 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 928 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 929 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 930 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD -1 / Q_MAX  0 / Loss  0
TIMESTEP 931 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 932 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 933 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0


TIMESTEP 1015 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1016 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1017 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1018 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1019 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1020 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1021 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1022 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1023 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1024 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1025 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD -1 / Q_MAX  0

TIMESTEP 1108 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1109 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1110 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1111 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1112 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1113 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1114 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1115 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1116 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1117 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1118 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  

TIMESTEP 1199 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1200 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1201 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1202 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1203 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1204 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1205 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1206 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1207 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1208 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1209 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  

TIMESTEP 1291 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD -1 / Q_MAX  0 / Loss  0
TIMESTEP 1292 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1293 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1294 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1295 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1296 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1297 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1298 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1299 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1300 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1301 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0

TIMESTEP 1384 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1385 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1386 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD -1 / Q_MAX  0 / Loss  0
TIMESTEP 1387 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1388 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1389 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1390 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1391 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1392 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1393 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1394 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0

TIMESTEP 1477 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1478 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1479 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1480 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1481 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD -1 / Q_MAX  0 / Loss  0
TIMESTEP 1482 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1483 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1484 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1485 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1486 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1487 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0

TIMESTEP 1568 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1569 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1570 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1571 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1572 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1573 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1574 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1575 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1576 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD -1 / Q_MAX  0 / Loss  0
TIMESTEP 1577 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1578 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0

TIMESTEP 1659 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1660 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1661 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1662 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1663 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1664 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1665 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1666 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1667 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1668 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1669 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  

TIMESTEP 1751 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1752 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1753 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1754 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1755 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1756 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1757 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1758 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1759 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1760 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
TIMESTEP 1761 / STATE observe / EPSILON 0.0001 / ACTION 0 / REWARD 0.1 / Q_MAX  

KeyboardInterrupt: 

In [7]:


if __name__ == "__main__":
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    from keras import backend as K
    K.set_session(sess)
    main()


usage: ipykernel_launcher.py [-h] -m MODE
ipykernel_launcher.py: error: argument -m/--mode is required


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
