In [1]:
#!/usr/bin/env python
import pygame
from __future__ import print_function
import os
os.environ["CUDA_VISIVBLE_DEVICES"] = '0'
import tensorflow as tf
import cv2
import sys
sys.path.append("game/")
import wrapped_flappy_bird as game
import random
import numpy as np
from collections import deque
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
print('tensorflow version: ',tf.__version__)


[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 14193368364188291436
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 11453451469
locality {
  bus_id: 1
}
incarnation: 4578169034322647405
physical_device_desc: "device: 0, name: TITAN Xp, pci bus id: 0000:05:00.0, compute capability: 6.1"
, name: "/device:GPU:1"
device_type: "GPU"
memory_limit: 11970700903
locality {
  bus_id: 1
}
incarnation: 16601661021934340058
physical_device_desc: "device: 1, name: TITAN Xp, pci bus id: 0000:06:00.0, compute capability: 6.1"
, name: "/device:GPU:2"
device_type: "GPU"
memory_limit: 11970700903
locality {
  bus_id: 1
}
incarnation: 15922401563597657872
physical_device_desc: "device: 2, name: TITAN Xp, pci bus id: 0000:09:00.0, compute capability: 6.1"
]
tensorflow version:  1.4.0


In [2]:
import pygame
from __future__ import print_function
import numpy as np
from collections import deque
import cv2

In [3]:
GAME = 'bird' # the name of the game being played for log files
ACTIONS = 2 # number of valid actions
GAMMA = 0.99 # decay rate of past observations
OBSERVE = 10000. # timesteps to observe before training
EXPLORE = 3000000. # frames over which to anneal epsilon
FINAL_EPSILON = 0.0001 # final value of epsilon
INITIAL_EPSILON = 0.0001 # starting value of epsilon
REPLAY_MEMORY = 50000 # number of previous transitions to remember
BATCH = 32 # size of minibatch
FRAME_PER_ACTION = 1

In [4]:
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev = 0.01)
    return tf.Variable(initial)

In [5]:
def bias_variable(shape):
    initial = tf.constant(0.01, shape = shape)
    return tf.Variable(initial)

In [6]:
def conv2d(x, W, stride):
    return tf.nn.conv2d(x, W, strides = [1, stride, stride, 1], padding = "SAME")

In [7]:
def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = "SAME")

In [8]:
def createNetwork():
    # network weights
    W_conv1 = weight_variable([8, 8, 4, 32])
    b_conv1 = bias_variable([32])

    W_conv2 = weight_variable([4, 4, 32, 64])
    b_conv2 = bias_variable([64])

    W_conv3 = weight_variable([3, 3, 64, 64])
    b_conv3 = bias_variable([64])

    W_fc1 = weight_variable([1600, 512])
    b_fc1 = bias_variable([512])

    W_fc2 = weight_variable([512, ACTIONS])
    b_fc2 = bias_variable([ACTIONS])

    # input layer
    s = tf.placeholder("float", [None, 80, 80, 4])

    # hidden layers
    h_conv1 = tf.nn.relu(conv2d(s, W_conv1, 4) + b_conv1)
    h_pool1 = max_pool_2x2(h_conv1)

    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2, 2) + b_conv2)
    #h_pool2 = max_pool_2x2(h_conv2)

    h_conv3 = tf.nn.relu(conv2d(h_conv2, W_conv3, 1) + b_conv3)
    #h_pool3 = max_pool_2x2(h_conv3)

    #h_pool3_flat = tf.reshape(h_pool3, [-1, 256])
    h_conv3_flat = tf.reshape(h_conv3, [-1, 1600])

    h_fc1 = tf.nn.relu(tf.matmul(h_conv3_flat, W_fc1) + b_fc1)

    # readout layer
    readout = tf.matmul(h_fc1, W_fc2) + b_fc2

    return s, readout, h_fc1

In [9]:
def trainNetwork(s, readout, h_fc1, sess):
    # define the cost function
    a = tf.placeholder("float", [None, ACTIONS])
    y = tf.placeholder("float", [None])
    readout_action = tf.reduce_sum(tf.multiply(readout, a), reduction_indices=1)
    cost = tf.reduce_mean(tf.square(y - readout_action))
    train_step = tf.train.AdamOptimizer(1e-6).minimize(cost)

    # open up a game state to communicate with emulator
    game_state = game.GameState()

    # store the previous observations in replay memory
    D = deque()

    # printing
    a_file = open("logs_" + GAME + "/readout.txt", 'w')
    h_file = open("logs_" + GAME + "/hidden.txt", 'w')

    # get the first state by doing nothing and preprocess the image to 80x80x4
    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] = 1
    x_t, r_0, terminal = game_state.frame_step(do_nothing)
    x_t = cv2.cvtColor(cv2.resize(x_t, (80, 80)), cv2.COLOR_BGR2GRAY)
    ret, x_t = cv2.threshold(x_t,1,255,cv2.THRESH_BINARY)
    s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)

    # saving and loading networks
    saver = tf.train.Saver()
    sess.run(tf.initialize_all_variables())
    checkpoint = tf.train.get_checkpoint_state("saved_networks")
    if checkpoint and checkpoint.model_checkpoint_path:
        saver.restore(sess, checkpoint.model_checkpoint_path)
        print("Successfully loaded:", checkpoint.model_checkpoint_path)
    else:
        print("Could not find old network weights")

    # start training
    epsilon = INITIAL_EPSILON
    t = 0
    
    while "flappy bird" != "angry bird":
        # choose an action epsilon greedily
        readout_t = readout.eval(feed_dict={s : [s_t]})[0]
        a_t = np.zeros([ACTIONS])
        action_index = 0
        if t % FRAME_PER_ACTION == 0:
            if random.random() <= epsilon:
                print("----------Random Action----------")
                
                if random.random() < 0.1 :
                    a_t[1] = 1
                    action_index = 1
                else:
                    a_t[0] = 1
                    action_index = 0
            else:
                action_index = np.argmax(readout_t)
                a_t[action_index] = 1
        else:
            a_t[0] = 1 # do nothing

        # scale down epsilon
        if epsilon > FINAL_EPSILON and t > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE

        # run the selected action and observe next state and reward
        x_t1_colored, r_t, terminal = game_state.frame_step(a_t)
        x_t1 = cv2.cvtColor(cv2.resize(x_t1_colored, (80, 80)), cv2.COLOR_BGR2GRAY)
        ret, x_t1 = cv2.threshold(x_t1, 1, 255, cv2.THRESH_BINARY)
        x_t1 = np.reshape(x_t1, (80, 80, 1))
        #s_t1 = np.append(x_t1, s_t[:,:,1:], axis = 2)
        s_t1 = np.append(x_t1, s_t[:, :, :3], axis=2)

        # store the transition in D
        D.append((s_t, a_t, r_t, s_t1, terminal))
        if len(D) > REPLAY_MEMORY:
            D.popleft()

        # only train if done observing
        if t > OBSERVE:
            # sample a minibatch to train on
            minibatch = random.sample(D, BATCH)

            # get the batch variables
            s_j_batch = [d[0] for d in minibatch]
            a_batch = [d[1] for d in minibatch]
            r_batch = [d[2] for d in minibatch]
            s_j1_batch = [d[3] for d in minibatch]

            y_batch = []
            readout_j1_batch = readout.eval(feed_dict = {s : s_j1_batch})
            for i in range(0, len(minibatch)):
                terminal = minibatch[i][4]
                # if terminal, only equals reward
                if terminal:
                    y_batch.append(r_batch[i])
                else:
                    y_batch.append(r_batch[i] + GAMMA * np.max(readout_j1_batch[i]))

            # perform gradient step
            train_step.run(feed_dict = {
                y : y_batch,
                a : a_batch,
                s : s_j_batch}
            )

        # update the old values
        s_t = s_t1
        t += 1

        # save progress every 10000 iterations
        if t % 10000 == 0:
            saver.save(sess, 'saved_networks/' + GAME + '-dqn', global_step = t)

        # print info
        state = ""
        if t <= OBSERVE:
            state = "observe"
        elif t > OBSERVE and t <= OBSERVE + EXPLORE:
            state = "explore"
        else:
            state = "train"

        print("TIMESTEP", t, "/ STATE", state, \
            "/ EPSILON", epsilon, "/ ACTION", action_index, "/ REWARD", r_t, \
            "/ Q_MAX %e" % np.max(readout_t))
        # write info to files
        '''
        if t % 10000 <= 100:
            a_file.write(",".join([str(x) for x in readout_t]) + '\n')
            h_file.write(",".join([str(x) for x in h_fc1.eval(feed_dict={s:[s_t]})[0]]) + '\n')
            cv2.imwrite("logs_tetris/frame" + str(t) + ".png", x_t1)
        '''

In [None]:
def playGame():
    sess = tf.InteractiveSession()
    s, readout, h_fc1 = createNetwork()
    trainNetwork(s, readout, h_fc1, sess)

def main():
    playGame()

if __name__ == "__main__":
    main()

Instructions for updating:
Use `tf.global_variables_initializer` instead.
INFO:tensorflow:Restoring parameters from saved_networks/bird-dqn-590000
Successfully loaded: saved_networks/bird-dqn-590000
----------Random Action----------
TIMESTEP 1 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.193735e+01
TIMESTEP 2 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.193025e+01
TIMESTEP 3 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.186570e+01
----------Random Action----------
TIMESTEP 4 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.185735e+01
TIMESTEP 5 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.191248e+01
----------Random Action----------
TIMESTEP 6 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.193458e+01
TIMESTEP 7 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.191990e+01
TIMESTEP 8 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.196165e+01
----------R

TIMESTEP 80 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.205774e+01
----------Random Action----------
TIMESTEP 81 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212719e+01
TIMESTEP 82 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.207589e+01
TIMESTEP 83 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.213430e+01
TIMESTEP 84 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.201469e+01
----------Random Action----------
TIMESTEP 85 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.196468e+01
----------Random Action----------
TIMESTEP 86 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.187914e+01
TIMESTEP 87 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194107e+01
TIMESTEP 88 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.210937e+01
----------Random Action----------
TIMESTEP 89 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.22507

TIMESTEP 159 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.158769e+01
----------Random Action----------
TIMESTEP 160 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.155257e+01
----------Random Action----------
TIMESTEP 161 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.147033e+01
----------Random Action----------
TIMESTEP 162 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.137382e+01
----------Random Action----------
TIMESTEP 163 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.141316e+01
----------Random Action----------
TIMESTEP 164 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.150002e+01
----------Random Action----------
TIMESTEP 165 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.142439e+01
TIMESTEP 166 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.134711e+01
TIMESTEP 167 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.150133e+01
----

TIMESTEP 239 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.187050e+01
----------Random Action----------
TIMESTEP 240 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.188617e+01
TIMESTEP 241 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.190925e+01
----------Random Action----------
TIMESTEP 242 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194665e+01
----------Random Action----------
TIMESTEP 243 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194205e+01
TIMESTEP 244 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.196960e+01
----------Random Action----------
TIMESTEP 245 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.195107e+01
TIMESTEP 246 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.203149e+01
TIMESTEP 247 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.203465e+01
----------Random Action----------
TIMESTEP 248 / STATE observe / EPSILON

TIMESTEP 318 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.266641e+01
TIMESTEP 319 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.263886e+01
----------Random Action----------
TIMESTEP 320 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.261755e+01
----------Random Action----------
TIMESTEP 321 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.272027e+01
----------Random Action----------
TIMESTEP 322 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.269266e+01
----------Random Action----------
TIMESTEP 323 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.275849e+01
TIMESTEP 324 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.279023e+01
TIMESTEP 325 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.289417e+01
----------Random Action----------
TIMESTEP 326 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.299945e+01
----------Random Action----------
TIME

TIMESTEP 397 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.191990e+01
TIMESTEP 398 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.196826e+01
TIMESTEP 399 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.194172e+01
----------Random Action----------
TIMESTEP 400 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212528e+01
----------Random Action----------
TIMESTEP 401 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.210783e+01
TIMESTEP 402 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.213789e+01
TIMESTEP 403 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.215049e+01
TIMESTEP 404 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.218141e+01
----------Random Action----------
TIMESTEP 405 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.209971e+01
----------Random Action----------
TIMESTEP 406 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_M

TIMESTEP 477 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.229909e+01
TIMESTEP 478 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.229144e+01
----------Random Action----------
TIMESTEP 479 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.217258e+01
TIMESTEP 480 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.229131e+01
TIMESTEP 481 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212325e+01
TIMESTEP 482 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.226433e+01
TIMESTEP 483 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212497e+01
----------Random Action----------
TIMESTEP 484 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.143352e+01
TIMESTEP 485 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.116204e+01
----------Random Action----------
TIMESTEP 486 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.265311e+01
----------Random A

----------Random Action----------
TIMESTEP 557 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.215047e+01
TIMESTEP 558 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.209779e+01
TIMESTEP 559 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 8.072328e+00
----------Random Action----------
TIMESTEP 560 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 2.103189e+00
TIMESTEP 561 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 2.614392e+00
TIMESTEP 562 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 6.681122e+00
----------Random Action----------
TIMESTEP 563 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 3.651950e+00
TIMESTEP 564 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX -4.320824e-01
----------Random Action----------
TIMESTEP 565 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 7.695042e+00
TIMESTEP 566 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_

TIMESTEP 642 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.156108e+01
----------Random Action----------
TIMESTEP 643 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.163680e+01
----------Random Action----------
TIMESTEP 644 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.163766e+01
----------Random Action----------
TIMESTEP 645 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.164388e+01
----------Random Action----------
TIMESTEP 646 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.170956e+01
----------Random Action----------
TIMESTEP 647 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.174213e+01
----------Random Action----------
TIMESTEP 648 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.176208e+01
TIMESTEP 649 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.184638e+01
TIMESTEP 650 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.195925e+01
TIME

TIMESTEP 723 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222843e+01
----------Random Action----------
TIMESTEP 724 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.224571e+01
TIMESTEP 725 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.224323e+01
TIMESTEP 726 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222850e+01
----------Random Action----------
TIMESTEP 727 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.228925e+01
----------Random Action----------
TIMESTEP 728 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.231758e+01
----------Random Action----------
TIMESTEP 729 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.236042e+01
----------Random Action----------
TIMESTEP 730 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.218211e+01
----------Random Action----------
TIMESTEP 731 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.182669e+01
----

TIMESTEP 803 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194498e+01
TIMESTEP 804 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.199030e+01
----------Random Action----------
TIMESTEP 805 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202384e+01
TIMESTEP 806 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.204463e+01
TIMESTEP 807 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.200085e+01
----------Random Action----------
TIMESTEP 808 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.195364e+01
----------Random Action----------
TIMESTEP 809 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.207582e+01
TIMESTEP 810 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.216751e+01
TIMESTEP 811 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.216057e+01
TIMESTEP 812 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.207050e+01
TIMESTEP 813 / STA

----------Random Action----------
TIMESTEP 890 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.178001e+01
----------Random Action----------
TIMESTEP 891 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.179196e+01
----------Random Action----------
TIMESTEP 892 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.184541e+01
----------Random Action----------
TIMESTEP 893 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.187654e+01
TIMESTEP 894 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.184657e+01
TIMESTEP 895 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.191931e+01
TIMESTEP 896 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192895e+01
TIMESTEP 897 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.175099e+01
TIMESTEP 898 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.181183e+01
----------Random Action----------
TIMESTEP 899 / STATE observe / EPSILON

TIMESTEP 969 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.203788e+01
----------Random Action----------
TIMESTEP 970 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.200703e+01
----------Random Action----------
TIMESTEP 971 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.196335e+01
TIMESTEP 972 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.188190e+01
TIMESTEP 973 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.198409e+01
----------Random Action----------
TIMESTEP 974 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.162755e+01
TIMESTEP 975 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.198987e+01
TIMESTEP 976 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.183150e+01
----------Random Action----------
TIMESTEP 977 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.209876e+01
----------Random Action----------
TIMESTEP 978 / STATE observe / EPSILON

----------Random Action----------
TIMESTEP 1049 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.161162e+01
----------Random Action----------
TIMESTEP 1050 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.157642e+01
TIMESTEP 1051 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.164120e+01
----------Random Action----------
TIMESTEP 1052 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.170957e+01
TIMESTEP 1053 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.162929e+01
----------Random Action----------
TIMESTEP 1054 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.161349e+01
TIMESTEP 1055 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.169252e+01
----------Random Action----------
TIMESTEP 1056 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.163764e+01
TIMESTEP 1057 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.161639e+01
TIMESTEP 1058 / STATE observe

----------Random Action----------
TIMESTEP 1133 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.153212e+01
TIMESTEP 1134 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 6.773630e+00
----------Random Action----------
TIMESTEP 1135 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 7.044024e+00
TIMESTEP 1136 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 6.090459e+00
TIMESTEP 1137 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 4.252225e+00
TIMESTEP 1138 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 3.689699e+00
TIMESTEP 1139 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.992475e+00
----------Random Action----------
TIMESTEP 1140 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 4.515707e+00
TIMESTEP 1141 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 4.252953e+00
----------Random Action----------
TIMESTEP 1142 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

TIMESTEP 1212 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.172304e+01
TIMESTEP 1213 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.174773e+01
----------Random Action----------
TIMESTEP 1214 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.170706e+01
TIMESTEP 1215 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.178780e+01
----------Random Action----------
TIMESTEP 1216 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.188468e+01
TIMESTEP 1217 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.190936e+01
TIMESTEP 1218 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.187129e+01
TIMESTEP 1219 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.187564e+01
----------Random Action----------
TIMESTEP 1220 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.199335e+01
TIMESTEP 1221 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202831e+01
TIMESTEP

TIMESTEP 1292 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.163677e+01
TIMESTEP 1293 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.163185e+01
TIMESTEP 1294 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.161168e+01
TIMESTEP 1295 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.155248e+01
TIMESTEP 1296 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.127013e+01
----------Random Action----------
TIMESTEP 1297 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.135823e+01
----------Random Action----------
TIMESTEP 1298 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.159220e+01
----------Random Action----------
TIMESTEP 1299 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.165612e+01
----------Random Action----------
TIMESTEP 1300 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 4.621017e+00
TIMESTEP 1301 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD

----------Random Action----------
TIMESTEP 1373 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.210918e+01
TIMESTEP 1374 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.223555e+01
TIMESTEP 1375 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.208113e+01
----------Random Action----------
TIMESTEP 1376 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.217842e+01
TIMESTEP 1377 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.219969e+01
----------Random Action----------
TIMESTEP 1378 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231459e+01
----------Random Action----------
TIMESTEP 1379 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.227848e+01
----------Random Action----------
TIMESTEP 1380 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231982e+01
TIMESTEP 1381 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.226182e+01
----------Random Action------

TIMESTEP 1452 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.187098e+01
TIMESTEP 1453 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194071e+01
TIMESTEP 1454 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.198473e+01
----------Random Action----------
TIMESTEP 1455 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.205281e+01
TIMESTEP 1456 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.204282e+01
TIMESTEP 1457 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.199056e+01
----------Random Action----------
TIMESTEP 1458 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.206538e+01
----------Random Action----------
TIMESTEP 1459 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222915e+01
----------Random Action----------
TIMESTEP 1460 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.225239e+01
TIMESTEP 1461 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD

TIMESTEP 1530 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194191e+01
TIMESTEP 1531 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194043e+01
----------Random Action----------
TIMESTEP 1532 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.186579e+01
----------Random Action----------
TIMESTEP 1533 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.186705e+01
TIMESTEP 1534 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.191028e+01
TIMESTEP 1535 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.197492e+01
TIMESTEP 1536 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202859e+01
----------Random Action----------
TIMESTEP 1537 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212350e+01
TIMESTEP 1538 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.216811e+01
TIMESTEP 1539 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.216698e+01
--------

TIMESTEP 1611 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.250168e+01
----------Random Action----------
TIMESTEP 1612 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.248144e+01
TIMESTEP 1613 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.254299e+01
TIMESTEP 1614 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.265994e+01
----------Random Action----------
TIMESTEP 1615 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.264030e+01
TIMESTEP 1616 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.257705e+01
TIMESTEP 1617 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.267235e+01
----------Random Action----------
TIMESTEP 1618 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.274606e+01
----------Random Action----------
TIMESTEP 1619 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.280089e+01
TIMESTEP 1620 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

TIMESTEP 1690 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.195931e+01
TIMESTEP 1691 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202010e+01
----------Random Action----------
TIMESTEP 1692 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.207574e+01
TIMESTEP 1693 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.216230e+01
----------Random Action----------
TIMESTEP 1694 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.223126e+01
----------Random Action----------
TIMESTEP 1695 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.233907e+01
TIMESTEP 1696 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.241006e+01
----------Random Action----------
TIMESTEP 1697 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.244121e+01
----------Random Action----------
TIMESTEP 1698 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.239191e+01
----------Random Action------

TIMESTEP 1772 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.215225e+01
----------Random Action----------
TIMESTEP 1773 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222941e+01
TIMESTEP 1774 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.210866e+01
----------Random Action----------
TIMESTEP 1775 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.219476e+01
TIMESTEP 1776 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.229759e+01
----------Random Action----------
TIMESTEP 1777 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.241734e+01
----------Random Action----------
TIMESTEP 1778 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 9.128698e+00
TIMESTEP 1779 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX -4.918938e-01
TIMESTEP 1780 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX -2.581867e+00
TIMESTEP 1781 / STATE observe / EPSILON 0.5 / ACTION 1 / REWA

TIMESTEP 1850 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.302133e+01
----------Random Action----------
TIMESTEP 1851 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 1 / Q_MAX 1.304238e+01
----------Random Action----------
TIMESTEP 1852 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.146186e+01
TIMESTEP 1853 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 4.269297e-02
TIMESTEP 1854 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD -1 / Q_MAX -4.820081e+00
TIMESTEP 1855 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.153526e+01
TIMESTEP 1856 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.164544e+01
TIMESTEP 1857 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.187077e+01
----------Random Action----------
TIMESTEP 1858 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.183904e+01
----------Random Action----------
TIMESTEP 1859 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0

TIMESTEP 1929 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.238778e+01
----------Random Action----------
TIMESTEP 1930 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.242989e+01
TIMESTEP 1931 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.243630e+01
TIMESTEP 1932 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.234268e+01
----------Random Action----------
TIMESTEP 1933 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.244936e+01
----------Random Action----------
TIMESTEP 1934 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.223225e+01
TIMESTEP 1935 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.230035e+01
TIMESTEP 1936 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.240068e+01
----------Random Action----------
TIMESTEP 1937 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.249015e+01
TIMESTEP 1938 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

TIMESTEP 2007 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.211460e+01
TIMESTEP 2008 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.208197e+01
TIMESTEP 2009 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202160e+01
TIMESTEP 2010 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202181e+01
TIMESTEP 2011 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.199367e+01
TIMESTEP 2012 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.206663e+01
TIMESTEP 2013 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.219235e+01
----------Random Action----------
TIMESTEP 2014 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.224243e+01
TIMESTEP 2015 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.225548e+01
TIMESTEP 2016 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.225733e+01
TIMESTEP 2017 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 

TIMESTEP 2088 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.207313e+01
TIMESTEP 2089 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.213547e+01
----------Random Action----------
TIMESTEP 2090 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.234826e+01
----------Random Action----------
TIMESTEP 2091 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.233974e+01
----------Random Action----------
TIMESTEP 2092 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231358e+01
TIMESTEP 2093 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.235571e+01
TIMESTEP 2094 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.236600e+01
----------Random Action----------
TIMESTEP 2095 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.232757e+01
----------Random Action----------
TIMESTEP 2096 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.223001e+01
TIMESTEP 2097 / STATE observe

----------Random Action----------
TIMESTEP 2167 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.294972e+01
TIMESTEP 2168 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.296560e+01
TIMESTEP 2169 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.291846e+01
----------Random Action----------
TIMESTEP 2170 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.286050e+01
TIMESTEP 2171 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.292229e+01
TIMESTEP 2172 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.291597e+01
----------Random Action----------
TIMESTEP 2173 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 1 / Q_MAX 1.293773e+01
----------Random Action----------
TIMESTEP 2174 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.213089e+01
----------Random Action----------
TIMESTEP 2175 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.198074e+01
TIMESTEP 2176 / STATE observe /

TIMESTEP 2246 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212655e+01
----------Random Action----------
TIMESTEP 2247 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.206990e+01
TIMESTEP 2248 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.201858e+01
TIMESTEP 2249 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.204237e+01
----------Random Action----------
TIMESTEP 2250 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.204042e+01
TIMESTEP 2251 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.205122e+01
TIMESTEP 2252 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.186668e+01
TIMESTEP 2253 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.193556e+01
TIMESTEP 2254 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.196547e+01
TIMESTEP 2255 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.188211e+01
TIMESTEP 2256 / STATE observe / EPSILON 0.

TIMESTEP 2327 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.213262e+01
TIMESTEP 2328 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.201929e+01
TIMESTEP 2329 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.209119e+01
----------Random Action----------
TIMESTEP 2330 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.215464e+01
----------Random Action----------
TIMESTEP 2331 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.223870e+01
TIMESTEP 2332 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222786e+01
----------Random Action----------
TIMESTEP 2333 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.216100e+01
----------Random Action----------
TIMESTEP 2334 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.220744e+01
----------Random Action----------
TIMESTEP 2335 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.218873e+01
----------Random Action------

TIMESTEP 2407 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.223616e+01
TIMESTEP 2408 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.237641e+01
----------Random Action----------
TIMESTEP 2409 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.235988e+01
TIMESTEP 2410 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.240702e+01
----------Random Action----------
TIMESTEP 2411 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.240644e+01
----------Random Action----------
TIMESTEP 2412 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.237372e+01
TIMESTEP 2413 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.244919e+01
TIMESTEP 2414 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.261905e+01
TIMESTEP 2415 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.269181e+01
TIMESTEP 2416 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.265057e+01
TIMESTEP

TIMESTEP 2488 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.243515e+01
TIMESTEP 2489 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.245003e+01
----------Random Action----------
TIMESTEP 2490 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.247241e+01
----------Random Action----------
TIMESTEP 2491 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.237178e+01
----------Random Action----------
TIMESTEP 2492 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.245386e+01
----------Random Action----------
TIMESTEP 2493 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.270309e+01
----------Random Action----------
TIMESTEP 2494 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.265937e+01
TIMESTEP 2495 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.264167e+01
TIMESTEP 2496 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.279267e+01
----------Random Action------

TIMESTEP 2570 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.176655e+01
----------Random Action----------
TIMESTEP 2571 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.182847e+01
TIMESTEP 2572 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.178206e+01
----------Random Action----------
TIMESTEP 2573 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.182356e+01
----------Random Action----------
TIMESTEP 2574 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.178909e+01
TIMESTEP 2575 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.177419e+01
TIMESTEP 2576 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.168430e+01
TIMESTEP 2577 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.166921e+01
----------Random Action----------
TIMESTEP 2578 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.174069e+01
----------Random Action----------
TIMESTEP 2579 / STATE observe

TIMESTEP 2649 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202215e+01
----------Random Action----------
TIMESTEP 2650 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.199402e+01
----------Random Action----------
TIMESTEP 2651 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.208878e+01
----------Random Action----------
TIMESTEP 2652 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.214565e+01
----------Random Action----------
TIMESTEP 2653 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.203509e+01
----------Random Action----------
TIMESTEP 2654 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.214768e+01
----------Random Action----------
TIMESTEP 2655 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231182e+01
TIMESTEP 2656 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.237313e+01
TIMESTEP 2657 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.232871

TIMESTEP 2728 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.272926e+01
TIMESTEP 2729 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.270812e+01
TIMESTEP 2730 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.278595e+01
TIMESTEP 2731 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.281096e+01
TIMESTEP 2732 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 1 / Q_MAX 1.277402e+01
TIMESTEP 2733 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.195854e+01
TIMESTEP 2734 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.181089e+01
TIMESTEP 2735 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.187182e+01
TIMESTEP 2736 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.186125e+01
----------Random Action----------
TIMESTEP 2737 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.180452e+01
----------Random Action----------
TIMESTEP 2738 / STATE observe / EPSILON 0.5 

TIMESTEP 2809 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.244776e+01
----------Random Action----------
TIMESTEP 2810 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.242490e+01
TIMESTEP 2811 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.252164e+01
----------Random Action----------
TIMESTEP 2812 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.250772e+01
TIMESTEP 2813 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.246922e+01
----------Random Action----------
TIMESTEP 2814 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.264465e+01
TIMESTEP 2815 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.245487e+01
----------Random Action----------
TIMESTEP 2816 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.245500e+01
TIMESTEP 2817 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.244330e+01
----------Random Action----------
TIMESTEP 2818 / STATE observe

TIMESTEP 2889 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.179597e+01
TIMESTEP 2890 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.179241e+01
----------Random Action----------
TIMESTEP 2891 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.179944e+01
TIMESTEP 2892 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.182625e+01
----------Random Action----------
TIMESTEP 2893 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.185385e+01
----------Random Action----------
TIMESTEP 2894 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.185324e+01
----------Random Action----------
TIMESTEP 2895 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.183969e+01
----------Random Action----------
TIMESTEP 2896 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.178193e+01
TIMESTEP 2897 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.176451e+01
----------Random Action------

TIMESTEP 2969 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.190870e+01
----------Random Action----------
TIMESTEP 2970 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.201828e+01
----------Random Action----------
TIMESTEP 2971 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.209053e+01
----------Random Action----------
TIMESTEP 2972 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.207847e+01
TIMESTEP 2973 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212291e+01
TIMESTEP 2974 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212337e+01
TIMESTEP 2975 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.206583e+01
----------Random Action----------
TIMESTEP 2976 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.216567e+01
TIMESTEP 2977 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.224290e+01
----------Random Action----------
TIMESTEP 2978 / STATE observe

TIMESTEP 3048 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.184566e+01
----------Random Action----------
TIMESTEP 3049 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.189743e+01
TIMESTEP 3050 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.196358e+01
TIMESTEP 3051 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.201432e+01
TIMESTEP 3052 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.197810e+01
TIMESTEP 3053 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.193202e+01
TIMESTEP 3054 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.195371e+01
TIMESTEP 3055 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.197756e+01
TIMESTEP 3056 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.210131e+01
TIMESTEP 3057 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.205766e+01
----------Random Action----------
TIMESTEP 3058 / STATE observe / EPSILON 0.

TIMESTEP 3129 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.248271e+01
TIMESTEP 3130 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.175537e+01
----------Random Action----------
TIMESTEP 3131 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 2.461645e+00
TIMESTEP 3132 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD -1 / Q_MAX 2.863789e+00
----------Random Action----------
TIMESTEP 3133 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.183710e+01
TIMESTEP 3134 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.159644e+01
TIMESTEP 3135 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.173877e+01
----------Random Action----------
TIMESTEP 3136 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.178759e+01
TIMESTEP 3137 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.181023e+01
TIMESTEP 3138 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.182799e+01
TIMESTEP 

----------Random Action----------
TIMESTEP 3211 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.163176e+01
TIMESTEP 3212 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.162097e+01
----------Random Action----------
TIMESTEP 3213 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.159029e+01
TIMESTEP 3214 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.167392e+01
TIMESTEP 3215 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.164533e+01
----------Random Action----------
TIMESTEP 3216 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.154195e+01
----------Random Action----------
TIMESTEP 3217 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.152276e+01
----------Random Action----------
TIMESTEP 3218 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.158591e+01
TIMESTEP 3219 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.151566e+01
----------Random Action------

TIMESTEP 3290 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.289453e+01
TIMESTEP 3291 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.295907e+01
TIMESTEP 3292 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.285910e+01
TIMESTEP 3293 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.283846e+01
TIMESTEP 3294 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.291408e+01
----------Random Action----------
TIMESTEP 3295 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.293919e+01
TIMESTEP 3296 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 1 / Q_MAX 1.296686e+01
----------Random Action----------
TIMESTEP 3297 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.208192e+01
----------Random Action----------
TIMESTEP 3298 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192191e+01
----------Random Action----------
TIMESTEP 3299 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0

----------Random Action----------
TIMESTEP 3371 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.263421e+01
TIMESTEP 3372 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.271664e+01
TIMESTEP 3373 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.272411e+01
----------Random Action----------
TIMESTEP 3374 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.282479e+01
----------Random Action----------
TIMESTEP 3375 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.277848e+01
TIMESTEP 3376 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.280343e+01
----------Random Action----------
TIMESTEP 3377 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.295819e+01
----------Random Action----------
TIMESTEP 3378 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD -1 / Q_MAX -7.317328e+00
----------Random Action----------
TIMESTEP 3379 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.179982

----------Random Action----------
TIMESTEP 3451 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.182979e+01
TIMESTEP 3452 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.179549e+01
TIMESTEP 3453 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.184261e+01
----------Random Action----------
TIMESTEP 3454 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.181934e+01
TIMESTEP 3455 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.174958e+01
----------Random Action----------
TIMESTEP 3456 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.174358e+01
TIMESTEP 3457 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.171402e+01
TIMESTEP 3458 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.171690e+01
TIMESTEP 3459 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.171232e+01
----------Random Action----------
TIMESTEP 3460 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

----------Random Action----------
TIMESTEP 3529 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.178855e+01
----------Random Action----------
TIMESTEP 3530 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.162835e+01
----------Random Action----------
TIMESTEP 3531 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.164920e+01
TIMESTEP 3532 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.174721e+01
TIMESTEP 3533 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.174843e+01
----------Random Action----------
TIMESTEP 3534 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.173965e+01
----------Random Action----------
TIMESTEP 3535 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.173573e+01
----------Random Action----------
TIMESTEP 3536 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.168131e+01
TIMESTEP 3537 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.175045

TIMESTEP 3609 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.233204e+01
----------Random Action----------
TIMESTEP 3610 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.235539e+01
TIMESTEP 3611 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.246809e+01
----------Random Action----------
TIMESTEP 3612 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.253730e+01
----------Random Action----------
TIMESTEP 3613 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.253814e+01
----------Random Action----------
TIMESTEP 3614 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.245647e+01
----------Random Action----------
TIMESTEP 3615 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.239321e+01
----------Random Action----------
TIMESTEP 3616 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.237819e+01
----------Random Action----------
TIMESTEP 3617 / STATE observe / EPSILON 0.5 / ACTI

TIMESTEP 3689 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.204041e+01
----------Random Action----------
TIMESTEP 3690 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.205493e+01
TIMESTEP 3691 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.188150e+01
TIMESTEP 3692 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.215355e+01
TIMESTEP 3693 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.224962e+01
TIMESTEP 3694 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.221143e+01
----------Random Action----------
TIMESTEP 3695 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.218927e+01
----------Random Action----------
TIMESTEP 3696 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.220951e+01
TIMESTEP 3697 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.227707e+01
TIMESTEP 3698 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.230655e+01
--------

----------Random Action----------
TIMESTEP 3768 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.190991e+01
TIMESTEP 3769 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.199998e+01
----------Random Action----------
TIMESTEP 3770 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202516e+01
----------Random Action----------
TIMESTEP 3771 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.205376e+01
TIMESTEP 3772 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.198889e+01
----------Random Action----------
TIMESTEP 3773 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.199075e+01
TIMESTEP 3774 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.205633e+01
TIMESTEP 3775 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202153e+01
----------Random Action----------
TIMESTEP 3776 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194576e+01
TIMESTEP 3777 / STATE observe

----------Random Action----------
TIMESTEP 3850 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.284605e+01
TIMESTEP 3851 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.295612e+01
TIMESTEP 3852 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.305353e+01
----------Random Action----------
TIMESTEP 3853 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 1 / Q_MAX 1.307045e+01
----------Random Action----------
TIMESTEP 3854 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.221251e+01
----------Random Action----------
TIMESTEP 3855 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.202544e+01
----------Random Action----------
TIMESTEP 3856 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.199776e+01
----------Random Action----------
TIMESTEP 3857 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD -1 / Q_MAX -6.107518e-01
TIMESTEP 3858 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.146627e+

TIMESTEP 3929 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.218157e+01
TIMESTEP 3930 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.226240e+01
TIMESTEP 3931 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.230046e+01
----------Random Action----------
TIMESTEP 3932 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.230544e+01
TIMESTEP 3933 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.214294e+01
----------Random Action----------
TIMESTEP 3934 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.175496e+01
TIMESTEP 3935 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222107e+01
----------Random Action----------
TIMESTEP 3936 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.236366e+01
TIMESTEP 3937 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX -3.938987e+00
----------Random Action----------
TIMESTEP 3938 / STATE observe / EPSILON 0.5 / ACTION 0 / REWAR

TIMESTEP 4010 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.228001e+01
TIMESTEP 4011 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.236589e+01
TIMESTEP 4012 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.230603e+01
TIMESTEP 4013 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.220211e+01
TIMESTEP 4014 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.223622e+01
TIMESTEP 4015 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.229423e+01
TIMESTEP 4016 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.243108e+01
TIMESTEP 4017 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.245709e+01
----------Random Action----------
TIMESTEP 4018 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.252528e+01
TIMESTEP 4019 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.240141e+01
----------Random Action----------
TIMESTEP 4020 / STATE observe / EPSILON 0.

TIMESTEP 4097 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.197114e+01
TIMESTEP 4098 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.203158e+01
----------Random Action----------
TIMESTEP 4099 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.196785e+01
----------Random Action----------
TIMESTEP 4100 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192297e+01
----------Random Action----------
TIMESTEP 4101 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.191222e+01
----------Random Action----------
TIMESTEP 4102 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.189777e+01
TIMESTEP 4103 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.181095e+01
TIMESTEP 4104 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.196144e+01
TIMESTEP 4105 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.190378e+01
TIMESTEP 4106 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

TIMESTEP 4176 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.158989e+01
----------Random Action----------
TIMESTEP 4177 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.160059e+01
TIMESTEP 4178 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.152259e+01
----------Random Action----------
TIMESTEP 4179 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.150501e+01
TIMESTEP 4180 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.152156e+01
----------Random Action----------
TIMESTEP 4181 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.144279e+01
TIMESTEP 4182 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.147720e+01
----------Random Action----------
TIMESTEP 4183 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.151242e+01
----------Random Action----------
TIMESTEP 4184 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.164702e+01
TIMESTEP 4185 / STATE observe

TIMESTEP 4255 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.197268e+01
TIMESTEP 4256 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.196626e+01
TIMESTEP 4257 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.198313e+01
TIMESTEP 4258 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.198243e+01
TIMESTEP 4259 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.201479e+01
----------Random Action----------
TIMESTEP 4260 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202424e+01
TIMESTEP 4261 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.208087e+01
----------Random Action----------
TIMESTEP 4262 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.209227e+01
----------Random Action----------
TIMESTEP 4263 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.209243e+01
TIMESTEP 4264 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.211220e+01
--------

TIMESTEP 4335 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.207656e+01
----------Random Action----------
TIMESTEP 4336 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202465e+01
TIMESTEP 4337 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.205801e+01
TIMESTEP 4338 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.205294e+01
TIMESTEP 4339 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.205096e+01
TIMESTEP 4340 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.186973e+01
TIMESTEP 4341 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194417e+01
----------Random Action----------
TIMESTEP 4342 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194540e+01
----------Random Action----------
TIMESTEP 4343 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.185863e+01
TIMESTEP 4344 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.193767e+01
TIMESTEP

TIMESTEP 4416 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.141544e+01
TIMESTEP 4417 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.111384e+01
----------Random Action----------
TIMESTEP 4418 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.053044e+01
TIMESTEP 4419 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.032815e+01
----------Random Action----------
TIMESTEP 4420 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 9.328930e+00
----------Random Action----------
TIMESTEP 4421 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.066961e+01
----------Random Action----------
TIMESTEP 4422 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.135351e+01
----------Random Action----------
TIMESTEP 4423 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.092885e+01
----------Random Action----------
TIMESTEP 4424 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.055482

TIMESTEP 4496 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.305845e+01
----------Random Action----------
TIMESTEP 4497 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.314860e+01
TIMESTEP 4498 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.313233e+01
TIMESTEP 4499 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 1 / Q_MAX 1.310427e+01
TIMESTEP 4500 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231326e+01
TIMESTEP 4501 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.213488e+01
TIMESTEP 4502 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.221842e+01
TIMESTEP 4503 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.227755e+01
----------Random Action----------
TIMESTEP 4504 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222723e+01
----------Random Action----------
TIMESTEP 4505 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.227162e+01
----------

----------Random Action----------
TIMESTEP 4578 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.213715e+01
----------Random Action----------
TIMESTEP 4579 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.212133e+01
----------Random Action----------
TIMESTEP 4580 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.214640e+01
TIMESTEP 4581 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.214461e+01
TIMESTEP 4582 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222243e+01
TIMESTEP 4583 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.215150e+01
----------Random Action----------
TIMESTEP 4584 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.216191e+01
TIMESTEP 4585 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212061e+01
----------Random Action----------
TIMESTEP 4586 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.215084e+01
----------Random Action------

TIMESTEP 4658 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.300865e+01
TIMESTEP 4659 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 1 / Q_MAX 1.304480e+01
TIMESTEP 4660 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.231286e+01
----------Random Action----------
TIMESTEP 4661 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.203721e+01
----------Random Action----------
TIMESTEP 4662 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.209264e+01
----------Random Action----------
TIMESTEP 4663 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212645e+01
----------Random Action----------
TIMESTEP 4664 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.207095e+01
----------Random Action----------
TIMESTEP 4665 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.211650e+01
----------Random Action----------
TIMESTEP 4666 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222404e+

TIMESTEP 4739 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.198253e+01
----------Random Action----------
TIMESTEP 4740 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.214086e+01
----------Random Action----------
TIMESTEP 4741 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.234089e+01
TIMESTEP 4742 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.240397e+01
TIMESTEP 4743 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.217525e+01
TIMESTEP 4744 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.216882e+01
----------Random Action----------
TIMESTEP 4745 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.220581e+01
----------Random Action----------
TIMESTEP 4746 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.219925e+01
----------Random Action----------
TIMESTEP 4747 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.233861e+01
----------Random Action------

----------Random Action----------
TIMESTEP 4820 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.262477e+01
TIMESTEP 4821 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.258947e+01
----------Random Action----------
TIMESTEP 4822 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.251319e+01
----------Random Action----------
TIMESTEP 4823 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.263714e+01
----------Random Action----------
TIMESTEP 4824 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.267989e+01
----------Random Action----------
TIMESTEP 4825 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.277391e+01
----------Random Action----------
TIMESTEP 4826 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.274554e+01
TIMESTEP 4827 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.285387e+01
----------Random Action----------
TIMESTEP 4828 / STATE observe / EPSILON 0.5 / ACTI

TIMESTEP 4901 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.233262e+01
TIMESTEP 4902 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.236784e+01
----------Random Action----------
TIMESTEP 4903 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.240871e+01
----------Random Action----------
TIMESTEP 4904 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.237125e+01
----------Random Action----------
TIMESTEP 4905 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.239854e+01
----------Random Action----------
TIMESTEP 4906 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.240291e+01
TIMESTEP 4907 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.253039e+01
----------Random Action----------
TIMESTEP 4908 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.254710e+01
TIMESTEP 4909 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.250725e+01
TIMESTEP 4910 / STATE observe

TIMESTEP 4980 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.185516e+01
----------Random Action----------
TIMESTEP 4981 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.174221e+01
TIMESTEP 4982 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.158840e+01
TIMESTEP 4983 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.164948e+01
TIMESTEP 4984 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.162953e+01
----------Random Action----------
TIMESTEP 4985 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.169455e+01
----------Random Action----------
TIMESTEP 4986 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.169007e+01
----------Random Action----------
TIMESTEP 4987 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.168151e+01
----------Random Action----------
TIMESTEP 4988 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.163923e+01
----------Random Action------

TIMESTEP 5061 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.252488e+01
----------Random Action----------
TIMESTEP 5062 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.240662e+01
TIMESTEP 5063 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.241056e+01
TIMESTEP 5064 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231412e+01
TIMESTEP 5065 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.230756e+01
TIMESTEP 5066 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.227367e+01
----------Random Action----------
TIMESTEP 5067 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.226723e+01
----------Random Action----------
TIMESTEP 5068 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.238731e+01
TIMESTEP 5069 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.235779e+01
----------Random Action----------
TIMESTEP 5070 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

TIMESTEP 5139 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.182325e+01
TIMESTEP 5140 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.187121e+01
TIMESTEP 5141 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.182519e+01
TIMESTEP 5142 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.187799e+01
TIMESTEP 5143 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.187455e+01
TIMESTEP 5144 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202447e+01
TIMESTEP 5145 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.213856e+01
----------Random Action----------
TIMESTEP 5146 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.219157e+01
----------Random Action----------
TIMESTEP 5147 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.215587e+01
----------Random Action----------
TIMESTEP 5148 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.225873e+01
--------

TIMESTEP 5219 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.200629e+01
----------Random Action----------
TIMESTEP 5220 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.208869e+01
----------Random Action----------
TIMESTEP 5221 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.217417e+01
----------Random Action----------
TIMESTEP 5222 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.224042e+01
----------Random Action----------
TIMESTEP 5223 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.228041e+01
TIMESTEP 5224 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.218948e+01
----------Random Action----------
TIMESTEP 5225 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.216773e+01
TIMESTEP 5226 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.230279e+01
TIMESTEP 5227 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.236295e+01
TIMESTEP 5228 / STATE observe

----------Random Action----------
TIMESTEP 5299 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.217735e+01
TIMESTEP 5300 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.216730e+01
TIMESTEP 5301 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.210646e+01
TIMESTEP 5302 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.219267e+01
----------Random Action----------
TIMESTEP 5303 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.228566e+01
TIMESTEP 5304 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.231266e+01
TIMESTEP 5305 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.225994e+01
TIMESTEP 5306 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.224828e+01
TIMESTEP 5307 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.226209e+01
TIMESTEP 5308 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.224133e+01
TIMESTEP 5309 / STATE observe / EPSILON 0.

TIMESTEP 5377 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.210353e+01
TIMESTEP 5378 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.218149e+01
TIMESTEP 5379 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222094e+01
TIMESTEP 5380 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.221180e+01
----------Random Action----------
TIMESTEP 5381 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.226384e+01
TIMESTEP 5382 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231515e+01
TIMESTEP 5383 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.236846e+01
----------Random Action----------
TIMESTEP 5384 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.246214e+01
TIMESTEP 5385 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.245266e+01
----------Random Action----------
TIMESTEP 5386 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.256709e+01
--------

----------Random Action----------
TIMESTEP 5457 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.149261e+01
----------Random Action----------
TIMESTEP 5458 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.144240e+01
----------Random Action----------
TIMESTEP 5459 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.148128e+01
----------Random Action----------
TIMESTEP 5460 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.151381e+01
----------Random Action----------
TIMESTEP 5461 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.157822e+01
TIMESTEP 5462 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.169550e+01
----------Random Action----------
TIMESTEP 5463 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.177782e+01
----------Random Action----------
TIMESTEP 5464 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.176783e+01
TIMESTEP 5465 / STATE observe / EPSILON 0.5 / ACTI

TIMESTEP 5536 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.182851e+01
TIMESTEP 5537 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.187150e+01
TIMESTEP 5538 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.185789e+01
TIMESTEP 5539 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.180992e+01
----------Random Action----------
TIMESTEP 5540 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.189419e+01
TIMESTEP 5541 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.198519e+01
----------Random Action----------
TIMESTEP 5542 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.198512e+01
TIMESTEP 5543 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.219011e+01
TIMESTEP 5544 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.196238e+01
TIMESTEP 5545 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.197163e+01
----------Random Action----------
TIMESTEP

TIMESTEP 5615 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.154896e+01
TIMESTEP 5616 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.165304e+01
----------Random Action----------
TIMESTEP 5617 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.168834e+01
----------Random Action----------
TIMESTEP 5618 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.161594e+01
----------Random Action----------
TIMESTEP 5619 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.153874e+01
TIMESTEP 5620 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.147981e+01
----------Random Action----------
TIMESTEP 5621 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.150318e+01
TIMESTEP 5622 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.158560e+01
----------Random Action----------
TIMESTEP 5623 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.157518e+01
----------Random Action------

----------Random Action----------
TIMESTEP 5694 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.204888e+01
----------Random Action----------
TIMESTEP 5695 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.209050e+01
----------Random Action----------
TIMESTEP 5696 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.224823e+01
----------Random Action----------
TIMESTEP 5697 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.233045e+01
----------Random Action----------
TIMESTEP 5698 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.245999e+01
TIMESTEP 5699 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.230953e+01
TIMESTEP 5700 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.228510e+01
----------Random Action----------
TIMESTEP 5701 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.229473e+01
TIMESTEP 5702 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.236365

----------Random Action----------
TIMESTEP 5774 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.184417e+01
TIMESTEP 5775 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.179436e+01
TIMESTEP 5776 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.183384e+01
TIMESTEP 5777 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.195950e+01
----------Random Action----------
TIMESTEP 5778 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.196150e+01
----------Random Action----------
TIMESTEP 5779 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.199229e+01
----------Random Action----------
TIMESTEP 5780 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.197848e+01
TIMESTEP 5781 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194784e+01
TIMESTEP 5782 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194609e+01
TIMESTEP 5783 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

----------Random Action----------
TIMESTEP 5854 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.220346e+01
TIMESTEP 5855 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.225362e+01
TIMESTEP 5856 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.237122e+01
----------Random Action----------
TIMESTEP 5857 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.235581e+01
----------Random Action----------
TIMESTEP 5858 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.240662e+01
TIMESTEP 5859 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.241527e+01
TIMESTEP 5860 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.238027e+01
TIMESTEP 5861 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.249781e+01
----------Random Action----------
TIMESTEP 5862 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.260413e+01
----------Random Action----------
TIMESTEP 5863 / STATE observe

TIMESTEP 5933 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194994e+01
TIMESTEP 5934 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.203867e+01
----------Random Action----------
TIMESTEP 5935 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.251624e+01
TIMESTEP 5936 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX -5.361472e-01
----------Random Action----------
TIMESTEP 5937 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD -1 / Q_MAX -3.936416e+00
TIMESTEP 5938 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.155564e+01
TIMESTEP 5939 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.163539e+01
----------Random Action----------
TIMESTEP 5940 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.166976e+01
TIMESTEP 5941 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.182562e+01
----------Random Action----------
TIMESTEP 5942 / STATE observe / EPSILON 0.5 / ACTION 0 / REWAR

TIMESTEP 6012 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.191171e+01
TIMESTEP 6013 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.195359e+01
----------Random Action----------
TIMESTEP 6014 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.207993e+01
----------Random Action----------
TIMESTEP 6015 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.219781e+01
TIMESTEP 6016 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.215630e+01
TIMESTEP 6017 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.211623e+01
TIMESTEP 6018 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.207118e+01
TIMESTEP 6019 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.209722e+01
TIMESTEP 6020 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.210064e+01
----------Random Action----------
TIMESTEP 6021 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.207848e+01
--------

TIMESTEP 6098 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.185179e+01
----------Random Action----------
TIMESTEP 6099 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.183748e+01
----------Random Action----------
TIMESTEP 6100 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.183876e+01
TIMESTEP 6101 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.187726e+01
----------Random Action----------
TIMESTEP 6102 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.187869e+01
----------Random Action----------
TIMESTEP 6103 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.180947e+01
----------Random Action----------
TIMESTEP 6104 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.179552e+01
TIMESTEP 6105 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.184091e+01
TIMESTEP 6106 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.198848e+01
TIMESTEP 6107 / STATE observe

TIMESTEP 6179 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.215390e+01
----------Random Action----------
TIMESTEP 6180 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.214806e+01
----------Random Action----------
TIMESTEP 6181 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.219035e+01
----------Random Action----------
TIMESTEP 6182 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.221598e+01
TIMESTEP 6183 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.223645e+01
TIMESTEP 6184 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.219569e+01
TIMESTEP 6185 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.213112e+01
----------Random Action----------
TIMESTEP 6186 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.218221e+01
----------Random Action----------
TIMESTEP 6187 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.224930e+01
TIMESTEP 6188 / STATE observe

TIMESTEP 6258 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222882e+01
----------Random Action----------
TIMESTEP 6259 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.218264e+01
TIMESTEP 6260 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.210320e+01
----------Random Action----------
TIMESTEP 6261 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.207089e+01
TIMESTEP 6262 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.321921e+00
TIMESTEP 6263 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 7.184746e-01
----------Random Action----------
TIMESTEP 6264 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX -1.865833e+00
TIMESTEP 6265 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.325088e+00
TIMESTEP 6266 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 4.466323e+00
----------Random Action----------
TIMESTEP 6267 / STATE observe / EPSILON 0.5 / ACTION 0 / REWAR

TIMESTEP 6338 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.220005e+01
----------Random Action----------
TIMESTEP 6339 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.220446e+01
TIMESTEP 6340 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.229649e+01
TIMESTEP 6341 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.232129e+01
TIMESTEP 6342 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.229557e+01
TIMESTEP 6343 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.235353e+01
TIMESTEP 6344 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.237909e+01
----------Random Action----------
TIMESTEP 6345 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.241738e+01
TIMESTEP 6346 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.247899e+01
TIMESTEP 6347 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.257252e+01
----------Random Action----------
TIMESTEP

TIMESTEP 6418 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.265030e+01
----------Random Action----------
TIMESTEP 6419 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.284182e+01
TIMESTEP 6420 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.290537e+01
TIMESTEP 6421 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 1 / Q_MAX 1.291020e+01
TIMESTEP 6422 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.207383e+01
----------Random Action----------
TIMESTEP 6423 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.186520e+01
----------Random Action----------
TIMESTEP 6424 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.191923e+01
TIMESTEP 6425 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.195083e+01
----------Random Action----------
TIMESTEP 6426 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.195008e+01
TIMESTEP 6427 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0

----------Random Action----------
TIMESTEP 6503 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.196060e+01
----------Random Action----------
TIMESTEP 6504 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.205470e+01
----------Random Action----------
TIMESTEP 6505 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.210188e+01
----------Random Action----------
TIMESTEP 6506 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.194820e+01
TIMESTEP 6507 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.201752e+01
TIMESTEP 6508 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.182358e+01
TIMESTEP 6509 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.176235e+01
TIMESTEP 6510 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.171701e+01
TIMESTEP 6511 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.177332e+01
----------Random Action----------
TIMESTEP 6512 / STATE observe

----------Random Action----------
TIMESTEP 6583 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 3.592938e-01
----------Random Action----------
TIMESTEP 6584 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD -1 / Q_MAX -6.033437e-01
----------Random Action----------
TIMESTEP 6585 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.196736e+01
----------Random Action----------
TIMESTEP 6586 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.184621e+01
TIMESTEP 6587 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.163534e+01
----------Random Action----------
TIMESTEP 6588 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.175881e+01
TIMESTEP 6589 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.173638e+01
TIMESTEP 6590 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.177972e+01
----------Random Action----------
TIMESTEP 6591 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.184565

TIMESTEP 6661 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.198666e+01
TIMESTEP 6662 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.199133e+01
----------Random Action----------
TIMESTEP 6663 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.190766e+01
----------Random Action----------
TIMESTEP 6664 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.190133e+01
----------Random Action----------
TIMESTEP 6665 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.183146e+01
----------Random Action----------
TIMESTEP 6666 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.193766e+01
TIMESTEP 6667 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.188712e+01
----------Random Action----------
TIMESTEP 6668 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.186474e+01
----------Random Action----------
TIMESTEP 6669 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.189625

TIMESTEP 6739 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.233273e+01
TIMESTEP 6740 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.247498e+01
TIMESTEP 6741 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.248675e+01
----------Random Action----------
TIMESTEP 6742 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.239183e+01
TIMESTEP 6743 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.252495e+01
----------Random Action----------
TIMESTEP 6744 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.259012e+01
TIMESTEP 6745 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.266621e+01
TIMESTEP 6746 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.270074e+01
TIMESTEP 6747 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.267285e+01
----------Random Action----------
TIMESTEP 6748 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.281363e+01
TIMESTEP

TIMESTEP 6819 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.279213e+01
----------Random Action----------
TIMESTEP 6820 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 4.014401e+00
----------Random Action----------
TIMESTEP 6821 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD -1 / Q_MAX -4.017863e+00
TIMESTEP 6822 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.191978e+01
TIMESTEP 6823 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.174574e+01
TIMESTEP 6824 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.170114e+01
----------Random Action----------
TIMESTEP 6825 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.184419e+01
----------Random Action----------
TIMESTEP 6826 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.166985e+01
----------Random Action----------
TIMESTEP 6827 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.161561e+01
----------Random Action------

----------Random Action----------
TIMESTEP 6898 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.205692e+01
----------Random Action----------
TIMESTEP 6899 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.208183e+01
----------Random Action----------
TIMESTEP 6900 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202571e+01
TIMESTEP 6901 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.209456e+01
----------Random Action----------
TIMESTEP 6902 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.208117e+01
TIMESTEP 6903 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212978e+01
----------Random Action----------
TIMESTEP 6904 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.217157e+01
----------Random Action----------
TIMESTEP 6905 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.226916e+01
TIMESTEP 6906 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.232431

TIMESTEP 6978 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.236281e+01
TIMESTEP 6979 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.238664e+01
TIMESTEP 6980 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.239391e+01
----------Random Action----------
TIMESTEP 6981 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.251477e+01
TIMESTEP 6982 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.241772e+01
----------Random Action----------
TIMESTEP 6983 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.248535e+01
TIMESTEP 6984 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.256358e+01
----------Random Action----------
TIMESTEP 6985 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.257715e+01
----------Random Action----------
TIMESTEP 6986 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.258201e+01
----------Random Action----------
TIMESTEP 6987 / STATE observe

TIMESTEP 7057 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.193190e+01
----------Random Action----------
TIMESTEP 7058 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.189460e+01
TIMESTEP 7059 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.046410e+01
----------Random Action----------
TIMESTEP 7060 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 6.971680e+00
----------Random Action----------
TIMESTEP 7061 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.032251e+00
----------Random Action----------
TIMESTEP 7062 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 4.442854e+00
TIMESTEP 7063 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 5.844817e+00
TIMESTEP 7064 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 2.168946e+00
TIMESTEP 7065 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 3.180217e+00
TIMESTEP 7066 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD

TIMESTEP 7137 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.181446e+01
TIMESTEP 7138 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.191981e+01
----------Random Action----------
TIMESTEP 7139 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.197913e+01
TIMESTEP 7140 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.196875e+01
TIMESTEP 7141 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.206258e+01
----------Random Action----------
TIMESTEP 7142 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.211688e+01
TIMESTEP 7143 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.207865e+01
TIMESTEP 7144 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.197455e+01
TIMESTEP 7145 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192781e+01
----------Random Action----------
TIMESTEP 7146 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.193539e+01
TIMESTEP

TIMESTEP 7217 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD -1 / Q_MAX 2.604394e+00
----------Random Action----------
TIMESTEP 7218 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.166722e+01
----------Random Action----------
TIMESTEP 7219 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.152470e+01
----------Random Action----------
TIMESTEP 7220 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.164477e+01
TIMESTEP 7221 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.160448e+01
TIMESTEP 7222 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.152099e+01
----------Random Action----------
TIMESTEP 7223 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.144677e+01
TIMESTEP 7224 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.149823e+01
----------Random Action----------
TIMESTEP 7225 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.158830e+01
----------Random Action-------

TIMESTEP 7296 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.176606e+01
TIMESTEP 7297 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.184726e+01
TIMESTEP 7298 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.181075e+01
----------Random Action----------
TIMESTEP 7299 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.180904e+01
----------Random Action----------
TIMESTEP 7300 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.181819e+01
----------Random Action----------
TIMESTEP 7301 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.180305e+01
----------Random Action----------
TIMESTEP 7302 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.173454e+01
----------Random Action----------
TIMESTEP 7303 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.174715e+01
----------Random Action----------
TIMESTEP 7304 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.188806

TIMESTEP 7377 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.240566e+01
----------Random Action----------
TIMESTEP 7378 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.253688e+01
----------Random Action----------
TIMESTEP 7379 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.256698e+01
TIMESTEP 7380 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.259164e+01
----------Random Action----------
TIMESTEP 7381 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.256822e+01
TIMESTEP 7382 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.239280e+01
TIMESTEP 7383 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.219491e+01
TIMESTEP 7384 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.221582e+01
----------Random Action----------
TIMESTEP 7385 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.259882e+01
----------Random Action----------
TIMESTEP 7386 / STATE observe

TIMESTEP 7457 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD -1 / Q_MAX 9.503330e+00
TIMESTEP 7458 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.171128e+01
----------Random Action----------
TIMESTEP 7459 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.149537e+01
TIMESTEP 7460 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.165618e+01
TIMESTEP 7461 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.156357e+01
TIMESTEP 7462 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.149817e+01
----------Random Action----------
TIMESTEP 7463 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.143660e+01
TIMESTEP 7464 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.151966e+01
TIMESTEP 7465 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.146901e+01
TIMESTEP 7466 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.146366e+01
TIMESTEP 7467 / STATE observe / EPSILON 0.5

TIMESTEP 7537 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 7.969298e+00
TIMESTEP 7538 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 7.637813e+00
TIMESTEP 7539 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 8.393877e+00
----------Random Action----------
TIMESTEP 7540 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 5.740306e+00
----------Random Action----------
TIMESTEP 7541 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 5.417150e+00
----------Random Action----------
TIMESTEP 7542 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 4.837466e+00
----------Random Action----------
TIMESTEP 7543 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD -1 / Q_MAX 2.927653e+00
----------Random Action----------
TIMESTEP 7544 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202138e+01
TIMESTEP 7545 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.186033e+01
----------Random Action-------

----------Random Action----------
TIMESTEP 7616 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.182562e+01
TIMESTEP 7617 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.181640e+01
TIMESTEP 7618 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.180280e+01
----------Random Action----------
TIMESTEP 7619 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.188387e+01
----------Random Action----------
TIMESTEP 7620 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.189460e+01
TIMESTEP 7621 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192183e+01
----------Random Action----------
TIMESTEP 7622 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.191564e+01
----------Random Action----------
TIMESTEP 7623 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.199160e+01
TIMESTEP 7624 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202566e+01
TIMESTEP 7625 / STATE observe

TIMESTEP 7695 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.234548e+01
----------Random Action----------
TIMESTEP 7696 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.237161e+01
TIMESTEP 7697 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231567e+01
TIMESTEP 7698 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.228883e+01
----------Random Action----------
TIMESTEP 7699 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.235636e+01
----------Random Action----------
TIMESTEP 7700 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD -1 / Q_MAX 1.241432e+01
----------Random Action----------
TIMESTEP 7701 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.170269e+01
----------Random Action----------
TIMESTEP 7702 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.169079e+01
TIMESTEP 7703 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.156862e+01
----------Random Action-------

TIMESTEP 7775 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 3.628145e+00
TIMESTEP 7776 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX -5.476137e-01
----------Random Action----------
TIMESTEP 7777 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.666948e+00
----------Random Action----------
TIMESTEP 7778 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 4.209286e+00
----------Random Action----------
TIMESTEP 7779 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 4.181598e+00
TIMESTEP 7780 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 5.127433e+00
----------Random Action----------
TIMESTEP 7781 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 7.483508e+00
----------Random Action----------
TIMESTEP 7782 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 6.685711e+00
----------Random Action----------
TIMESTEP 7783 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 4.23978

----------Random Action----------
TIMESTEP 7856 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.230526e+01
----------Random Action----------
TIMESTEP 7857 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.225586e+01
----------Random Action----------
TIMESTEP 7858 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.221523e+01
----------Random Action----------
TIMESTEP 7859 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.224767e+01
TIMESTEP 7860 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.217470e+01
----------Random Action----------
TIMESTEP 7861 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.219087e+01
TIMESTEP 7862 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.221773e+01
TIMESTEP 7863 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.227001e+01
TIMESTEP 7864 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.223985e+01
TIMESTEP 7865 / STATE observe

----------Random Action----------
TIMESTEP 7937 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.135458e+01
----------Random Action----------
TIMESTEP 7938 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.138300e+01
----------Random Action----------
TIMESTEP 7939 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.134020e+01
----------Random Action----------
TIMESTEP 7940 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.132269e+01
TIMESTEP 7941 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.136411e+01
----------Random Action----------
TIMESTEP 7942 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.146654e+01
----------Random Action----------
TIMESTEP 7943 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.147130e+01
TIMESTEP 7944 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.150772e+01
TIMESTEP 7945 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.154957

TIMESTEP 8016 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.185870e+01
----------Random Action----------
TIMESTEP 8017 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.158700e+01
TIMESTEP 8018 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 8.611979e+00
----------Random Action----------
TIMESTEP 8019 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD -1 / Q_MAX 8.814654e+00
----------Random Action----------
TIMESTEP 8020 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.144355e+01
TIMESTEP 8021 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.167302e+01
TIMESTEP 8022 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.167979e+01
TIMESTEP 8023 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.175881e+01
----------Random Action----------
TIMESTEP 8024 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.176421e+01
TIMESTEP 8025 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 

----------Random Action----------
TIMESTEP 8096 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.188844e+01
----------Random Action----------
TIMESTEP 8097 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.189080e+01
TIMESTEP 8098 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192187e+01
----------Random Action----------
TIMESTEP 8099 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202552e+01
----------Random Action----------
TIMESTEP 8100 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222760e+01
----------Random Action----------
TIMESTEP 8101 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.229830e+01
TIMESTEP 8102 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212064e+01
TIMESTEP 8103 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.214593e+01
TIMESTEP 8104 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.232147e+01
TIMESTEP 8105 / STATE observe

TIMESTEP 8177 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.255967e+01
----------Random Action----------
TIMESTEP 8178 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.259517e+01
----------Random Action----------
TIMESTEP 8179 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.247291e+01
----------Random Action----------
TIMESTEP 8180 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.203408e+01
----------Random Action----------
TIMESTEP 8181 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.238214e+01
----------Random Action----------
TIMESTEP 8182 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.244807e+01
----------Random Action----------
TIMESTEP 8183 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.249401e+01
----------Random Action----------
TIMESTEP 8184 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.253113e+01
TIMESTEP 8185 / STATE observe / EPSILON 0.5 / ACTI

TIMESTEP 8257 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.241177e+01
TIMESTEP 8258 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.236583e+01
----------Random Action----------
TIMESTEP 8259 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.235028e+01
----------Random Action----------
TIMESTEP 8260 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.250047e+01
TIMESTEP 8261 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.261762e+01
TIMESTEP 8262 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.258672e+01
TIMESTEP 8263 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.249448e+01
TIMESTEP 8264 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.254847e+01
----------Random Action----------
TIMESTEP 8265 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.251887e+01
----------Random Action----------
TIMESTEP 8266 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

----------Random Action----------
TIMESTEP 8338 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.169377e+01
TIMESTEP 8339 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.169016e+01
TIMESTEP 8340 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.165921e+01
TIMESTEP 8341 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.169946e+01
----------Random Action----------
TIMESTEP 8342 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.166123e+01
TIMESTEP 8343 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.161180e+01
TIMESTEP 8344 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.163066e+01
----------Random Action----------
TIMESTEP 8345 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.166276e+01
TIMESTEP 8346 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.167074e+01
----------Random Action----------
TIMESTEP 8347 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

TIMESTEP 8418 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.219704e+01
----------Random Action----------
TIMESTEP 8419 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.216384e+01
----------Random Action----------
TIMESTEP 8420 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.210427e+01
TIMESTEP 8421 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.213960e+01
TIMESTEP 8422 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.197987e+01
TIMESTEP 8423 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.219180e+01
TIMESTEP 8424 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.250011e+01
TIMESTEP 8425 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.249526e+01
----------Random Action----------
TIMESTEP 8426 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.248015e+01
TIMESTEP 8427 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.246714e+01
--------

TIMESTEP 8503 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.241358e+01
----------Random Action----------
TIMESTEP 8504 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.241393e+01
----------Random Action----------
TIMESTEP 8505 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.255117e+01
TIMESTEP 8506 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.254334e+01
TIMESTEP 8507 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.265970e+01
TIMESTEP 8508 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.268113e+01
----------Random Action----------
TIMESTEP 8509 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.265167e+01
TIMESTEP 8510 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.269064e+01
----------Random Action----------
TIMESTEP 8511 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.274169e+01
TIMESTEP 8512 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

TIMESTEP 8582 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.185834e+01
----------Random Action----------
TIMESTEP 8583 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.195209e+01
----------Random Action----------
TIMESTEP 8584 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.198761e+01
TIMESTEP 8585 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.201960e+01
----------Random Action----------
TIMESTEP 8586 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.205150e+01
TIMESTEP 8587 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.200624e+01
----------Random Action----------
TIMESTEP 8588 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.196771e+01
TIMESTEP 8589 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.200813e+01
----------Random Action----------
TIMESTEP 8590 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.223197e+01
TIMESTEP 8591 / STATE observe

TIMESTEP 8662 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.280057e+01
----------Random Action----------
TIMESTEP 8663 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.285737e+01
----------Random Action----------
TIMESTEP 8664 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.284391e+01
----------Random Action----------
TIMESTEP 8665 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.294659e+01
TIMESTEP 8666 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.302029e+01
TIMESTEP 8667 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.298405e+01
----------Random Action----------
TIMESTEP 8668 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.300417e+01
----------Random Action----------
TIMESTEP 8669 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.308427e+01
----------Random Action----------
TIMESTEP 8670 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.293729

TIMESTEP 8742 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.213179e+01
----------Random Action----------
TIMESTEP 8743 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.218229e+01
TIMESTEP 8744 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.202368e+01
TIMESTEP 8745 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.207211e+01
TIMESTEP 8746 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.210978e+01
TIMESTEP 8747 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.211785e+01
----------Random Action----------
TIMESTEP 8748 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.211667e+01
----------Random Action----------
TIMESTEP 8749 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.219915e+01
----------Random Action----------
TIMESTEP 8750 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.216433e+01
TIMESTEP 8751 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

TIMESTEP 8821 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.204853e+01
TIMESTEP 8822 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.208442e+01
TIMESTEP 8823 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.199454e+01
TIMESTEP 8824 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202578e+01
----------Random Action----------
TIMESTEP 8825 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.217660e+01
TIMESTEP 8826 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.229177e+01
TIMESTEP 8827 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.240793e+01
----------Random Action----------
TIMESTEP 8828 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.246492e+01
----------Random Action----------
TIMESTEP 8829 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.263639e+01
----------Random Action----------
TIMESTEP 8830 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

TIMESTEP 8900 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.177072e+01
----------Random Action----------
TIMESTEP 8901 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.177453e+01
TIMESTEP 8902 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.168670e+01
----------Random Action----------
TIMESTEP 8903 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.163770e+01
----------Random Action----------
TIMESTEP 8904 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.165047e+01
----------Random Action----------
TIMESTEP 8905 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.168289e+01
TIMESTEP 8906 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.169142e+01
TIMESTEP 8907 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.161610e+01
TIMESTEP 8908 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.165391e+01
----------Random Action----------
TIMESTEP 8909 / STATE observe

TIMESTEP 8977 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX -1.305084e-01
----------Random Action----------
TIMESTEP 8978 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX -7.344152e-01
----------Random Action----------
TIMESTEP 8979 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.699221e-02
----------Random Action----------
TIMESTEP 8980 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX -2.936545e-03
----------Random Action----------
TIMESTEP 8981 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 3.073236e+00
----------Random Action----------
TIMESTEP 8982 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 4.866453e+00
----------Random Action----------
TIMESTEP 8983 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 9.625814e+00
TIMESTEP 8984 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 4.492393e+00
----------Random Action----------
TIMESTEP 8985 / STATE observe / EPSILON 0.5 / A

TIMESTEP 9056 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.234825e+01
TIMESTEP 9057 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.227881e+01
----------Random Action----------
TIMESTEP 9058 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.233767e+01
TIMESTEP 9059 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.216242e+01
----------Random Action----------
TIMESTEP 9060 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.224597e+01
----------Random Action----------
TIMESTEP 9061 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.232649e+01
----------Random Action----------
TIMESTEP 9062 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.242882e+01
----------Random Action----------
TIMESTEP 9063 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.253163e+01
----------Random Action----------
TIMESTEP 9064 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.254346

TIMESTEP 9138 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.239979e+01
----------Random Action----------
TIMESTEP 9139 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.230967e+01
----------Random Action----------
TIMESTEP 9140 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.237365e+01
TIMESTEP 9141 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.248435e+01
----------Random Action----------
TIMESTEP 9142 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.251275e+01
----------Random Action----------
TIMESTEP 9143 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.243886e+01
----------Random Action----------
TIMESTEP 9144 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.244073e+01
TIMESTEP 9145 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.255646e+01
TIMESTEP 9146 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.253641e+01
TIMESTEP 9147 / STATE observe

TIMESTEP 9217 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.265796e+01
TIMESTEP 9218 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.261366e+01
----------Random Action----------
TIMESTEP 9219 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.272281e+01
----------Random Action----------
TIMESTEP 9220 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.284087e+01
----------Random Action----------
TIMESTEP 9221 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 1 / Q_MAX 1.295268e+01
----------Random Action----------
TIMESTEP 9222 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD -1 / Q_MAX 1.215075e+01
----------Random Action----------
TIMESTEP 9223 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.149276e+01
TIMESTEP 9224 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.162422e+01
----------Random Action----------
TIMESTEP 9225 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.181882e+0

TIMESTEP 9296 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.181640e+01
TIMESTEP 9297 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.180280e+01
TIMESTEP 9298 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.188776e+01
TIMESTEP 9299 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.188230e+01
----------Random Action----------
TIMESTEP 9300 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.191707e+01
----------Random Action----------
TIMESTEP 9301 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192261e+01
TIMESTEP 9302 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.198859e+01
----------Random Action----------
TIMESTEP 9303 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202761e+01
----------Random Action----------
TIMESTEP 9304 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192564e+01
----------Random Action----------
TIMESTEP 9305 / STATE observe

TIMESTEP 9377 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.199178e+01
TIMESTEP 9378 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.199481e+01
----------Random Action----------
TIMESTEP 9379 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.195540e+01
TIMESTEP 9380 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.200842e+01
TIMESTEP 9381 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.219485e+01
TIMESTEP 9382 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.230138e+01
TIMESTEP 9383 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.253238e+01
----------Random Action----------
TIMESTEP 9384 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.268308e+01
----------Random Action----------
TIMESTEP 9385 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.265989e+01
----------Random Action----------
TIMESTEP 9386 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

TIMESTEP 9456 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.213776e+01
----------Random Action----------
TIMESTEP 9457 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.215421e+01
TIMESTEP 9458 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212556e+01
----------Random Action----------
TIMESTEP 9459 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.211893e+01
TIMESTEP 9460 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.217733e+01
TIMESTEP 9461 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.230584e+01
----------Random Action----------
TIMESTEP 9462 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231491e+01
TIMESTEP 9463 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.239379e+01
TIMESTEP 9464 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.247057e+01
TIMESTEP 9465 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.239410e+01
TIMESTEP

TIMESTEP 9536 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.241258e+01
TIMESTEP 9537 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.249195e+01
----------Random Action----------
TIMESTEP 9538 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.266063e+01
----------Random Action----------
TIMESTEP 9539 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.269134e+01
TIMESTEP 9540 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.290358e+01
TIMESTEP 9541 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.292463e+01
----------Random Action----------
TIMESTEP 9542 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.299802e+01
----------Random Action----------
TIMESTEP 9543 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.270275e+01
----------Random Action----------
TIMESTEP 9544 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 1 / Q_MAX 1.128690e+01
TIMESTEP 9545 / STATE observe /

TIMESTEP 9615 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.197078e+01
TIMESTEP 9616 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.199455e+01
----------Random Action----------
TIMESTEP 9617 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD -1 / Q_MAX 1.212030e+01
----------Random Action----------
TIMESTEP 9618 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.149727e+01
----------Random Action----------
TIMESTEP 9619 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.156147e+01
----------Random Action----------
TIMESTEP 9620 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.161921e+01
TIMESTEP 9621 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.166770e+01
----------Random Action----------
TIMESTEP 9622 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.162973e+01
----------Random Action----------
TIMESTEP 9623 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.161064e

TIMESTEP 9694 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.228054e+01
----------Random Action----------
TIMESTEP 9695 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.225188e+01
TIMESTEP 9696 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.228745e+01
----------Random Action----------
TIMESTEP 9697 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.234199e+01
TIMESTEP 9698 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.227153e+01
----------Random Action----------
TIMESTEP 9699 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.228505e+01
----------Random Action----------
TIMESTEP 9700 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.229934e+01
----------Random Action----------
TIMESTEP 9701 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.232844e+01
----------Random Action----------
TIMESTEP 9702 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.232798

----------Random Action----------
TIMESTEP 9775 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX -6.691815e+00
----------Random Action----------
TIMESTEP 9776 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD -1 / Q_MAX -5.629031e+00
TIMESTEP 9777 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.169509e+01
----------Random Action----------
TIMESTEP 9778 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.179639e+01
TIMESTEP 9779 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.166663e+01
----------Random Action----------
TIMESTEP 9780 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.173999e+01
----------Random Action----------
TIMESTEP 9781 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.176433e+01
TIMESTEP 9782 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.178398e+01
TIMESTEP 9783 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.182135e+01
TIMESTEP 9784 / STATE observ

TIMESTEP 9854 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.241730e+01
----------Random Action----------
TIMESTEP 9855 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.249609e+01
TIMESTEP 9856 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.251312e+01
TIMESTEP 9857 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.249003e+01
TIMESTEP 9858 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.238915e+01
----------Random Action----------
TIMESTEP 9859 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.243450e+01
----------Random Action----------
TIMESTEP 9860 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.220813e+01
----------Random Action----------
TIMESTEP 9861 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.215398e+01
----------Random Action----------
TIMESTEP 9862 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212765e+01
TIMESTEP 9863 / STATE observe

TIMESTEP 9932 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.265001e+01
TIMESTEP 9933 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.269583e+01
----------Random Action----------
TIMESTEP 9934 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.282915e+01
TIMESTEP 9935 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.293793e+01
TIMESTEP 9936 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.302670e+01
----------Random Action----------
TIMESTEP 9937 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.303715e+01
TIMESTEP 9938 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 1 / Q_MAX 1.309839e+01
TIMESTEP 9939 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.226996e+01
TIMESTEP 9940 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.208942e+01
TIMESTEP 9941 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.220735e+01
TIMESTEP 9942 / STATE observe / EPSILON 0.5 

----------Random Action----------
TIMESTEP 10016 / STATE explore / EPSILON 0.4999975005 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194238e+01
----------Random Action----------
TIMESTEP 10017 / STATE explore / EPSILON 0.499997333867 / ACTION 0 / REWARD 0.1 / Q_MAX 1.207077e+01
----------Random Action----------
TIMESTEP 10018 / STATE explore / EPSILON 0.499997167233 / ACTION 1 / REWARD 0.1 / Q_MAX 1.189700e+01
----------Random Action----------
TIMESTEP 10019 / STATE explore / EPSILON 0.4999970006 / ACTION 0 / REWARD 0.1 / Q_MAX 1.184191e+01
TIMESTEP 10020 / STATE explore / EPSILON 0.499996833967 / ACTION 0 / REWARD 0.1 / Q_MAX 1.175530e+01
----------Random Action----------
TIMESTEP 10021 / STATE explore / EPSILON 0.499996667333 / ACTION 0 / REWARD 0.1 / Q_MAX 1.176581e+01
TIMESTEP 10022 / STATE explore / EPSILON 0.4999965007 / ACTION 0 / REWARD 0.1 / Q_MAX 1.174845e+01
----------Random Action----------
TIMESTEP 10023 / STATE explore / EPSILON 0.499996334067 / ACTION 0 / REWARD 0.1 / Q_MAX 1.17086

TIMESTEP 10088 / STATE explore / EPSILON 0.4999855029 / ACTION 0 / REWARD 0.1 / Q_MAX 1.137083e+01
TIMESTEP 10089 / STATE explore / EPSILON 0.499985336267 / ACTION 0 / REWARD 0.1 / Q_MAX 1.139009e+01
TIMESTEP 10090 / STATE explore / EPSILON 0.499985169633 / ACTION 0 / REWARD 0.1 / Q_MAX 1.160733e+01
TIMESTEP 10091 / STATE explore / EPSILON 0.499985003 / ACTION 0 / REWARD 0.1 / Q_MAX 1.163844e+01
----------Random Action----------
TIMESTEP 10092 / STATE explore / EPSILON 0.499984836367 / ACTION 0 / REWARD 0.1 / Q_MAX 1.161541e+01
TIMESTEP 10093 / STATE explore / EPSILON 0.499984669733 / ACTION 0 / REWARD 0.1 / Q_MAX 1.160318e+01
TIMESTEP 10094 / STATE explore / EPSILON 0.4999845031 / ACTION 0 / REWARD 0.1 / Q_MAX 1.163305e+01
TIMESTEP 10095 / STATE explore / EPSILON 0.499984336467 / ACTION 0 / REWARD 0.1 / Q_MAX 1.162007e+01
----------Random Action----------
TIMESTEP 10096 / STATE explore / EPSILON 0.499984169833 / ACTION 0 / REWARD 0.1 / Q_MAX 1.154913e+01
----------Random Action-------

TIMESTEP 10162 / STATE explore / EPSILON 0.499973172033 / ACTION 0 / REWARD 0.1 / Q_MAX 1.153879e+01
TIMESTEP 10163 / STATE explore / EPSILON 0.4999730054 / ACTION 0 / REWARD 0.1 / Q_MAX 1.151434e+01
----------Random Action----------
TIMESTEP 10164 / STATE explore / EPSILON 0.499972838767 / ACTION 0 / REWARD 0.1 / Q_MAX 1.152806e+01
TIMESTEP 10165 / STATE explore / EPSILON 0.499972672133 / ACTION 0 / REWARD 0.1 / Q_MAX 1.160751e+01
----------Random Action----------
TIMESTEP 10166 / STATE explore / EPSILON 0.4999725055 / ACTION 0 / REWARD 0.1 / Q_MAX 1.169769e+01
TIMESTEP 10167 / STATE explore / EPSILON 0.499972338867 / ACTION 0 / REWARD 0.1 / Q_MAX 1.174437e+01
TIMESTEP 10168 / STATE explore / EPSILON 0.499972172233 / ACTION 0 / REWARD 0.1 / Q_MAX 1.180858e+01
----------Random Action----------
TIMESTEP 10169 / STATE explore / EPSILON 0.4999720056 / ACTION 0 / REWARD 0.1 / Q_MAX 1.183873e+01
----------Random Action----------
TIMESTEP 10170 / STATE explore / EPSILON 0.499971838967 / ACTI

TIMESTEP 10238 / STATE explore / EPSILON 0.4999605079 / ACTION 0 / REWARD 0.1 / Q_MAX 1.148735e+01
TIMESTEP 10239 / STATE explore / EPSILON 0.499960341267 / ACTION 0 / REWARD 0.1 / Q_MAX 1.156337e+01
TIMESTEP 10240 / STATE explore / EPSILON 0.499960174633 / ACTION 0 / REWARD 0.1 / Q_MAX 1.153189e+01
----------Random Action----------
TIMESTEP 10241 / STATE explore / EPSILON 0.499960008 / ACTION 0 / REWARD 0.1 / Q_MAX 1.157685e+01
----------Random Action----------
TIMESTEP 10242 / STATE explore / EPSILON 0.499959841367 / ACTION 0 / REWARD 0.1 / Q_MAX 1.148742e+01
----------Random Action----------
TIMESTEP 10243 / STATE explore / EPSILON 0.499959674733 / ACTION 0 / REWARD 0.1 / Q_MAX 1.149402e+01
TIMESTEP 10244 / STATE explore / EPSILON 0.4999595081 / ACTION 0 / REWARD 0.1 / Q_MAX 1.161950e+01
TIMESTEP 10245 / STATE explore / EPSILON 0.499959341467 / ACTION 0 / REWARD 0.1 / Q_MAX 1.163836e+01
----------Random Action----------
TIMESTEP 10246 / STATE explore / EPSILON 0.499959174833 / ACTIO

TIMESTEP 10312 / STATE explore / EPSILON 0.499948177033 / ACTION 0 / REWARD 0.1 / Q_MAX 1.144186e+01
TIMESTEP 10313 / STATE explore / EPSILON 0.4999480104 / ACTION 0 / REWARD 0.1 / Q_MAX 1.148363e+01
----------Random Action----------
TIMESTEP 10314 / STATE explore / EPSILON 0.499947843767 / ACTION 0 / REWARD 0.1 / Q_MAX 1.151051e+01
TIMESTEP 10315 / STATE explore / EPSILON 0.499947677133 / ACTION 0 / REWARD 0.1 / Q_MAX 1.147691e+01
TIMESTEP 10316 / STATE explore / EPSILON 0.4999475105 / ACTION 0 / REWARD 1 / Q_MAX 1.135367e+01
TIMESTEP 10317 / STATE explore / EPSILON 0.499947343867 / ACTION 0 / REWARD 0.1 / Q_MAX 1.060224e+01
----------Random Action----------
TIMESTEP 10318 / STATE explore / EPSILON 0.499947177233 / ACTION 0 / REWARD 0.1 / Q_MAX 1.034195e+01
----------Random Action----------
TIMESTEP 10319 / STATE explore / EPSILON 0.4999470106 / ACTION 0 / REWARD 0.1 / Q_MAX 1.040667e+01
TIMESTEP 10320 / STATE explore / EPSILON 0.499946843967 / ACTION 0 / REWARD 0.1 / Q_MAX 1.044410e+

TIMESTEP 10386 / STATE explore / EPSILON 0.499935846167 / ACTION 0 / REWARD 0.1 / Q_MAX 1.053856e+01
----------Random Action----------
TIMESTEP 10387 / STATE explore / EPSILON 0.499935679533 / ACTION 0 / REWARD 0.1 / Q_MAX 1.051979e+01
TIMESTEP 10388 / STATE explore / EPSILON 0.4999355129 / ACTION 0 / REWARD 0.1 / Q_MAX 1.055200e+01
TIMESTEP 10389 / STATE explore / EPSILON 0.499935346267 / ACTION 0 / REWARD 0.1 / Q_MAX 1.067793e+01
TIMESTEP 10390 / STATE explore / EPSILON 0.499935179633 / ACTION 0 / REWARD 0.1 / Q_MAX 1.076247e+01
TIMESTEP 10391 / STATE explore / EPSILON 0.499935013 / ACTION 0 / REWARD 0.1 / Q_MAX 1.063276e+01
TIMESTEP 10392 / STATE explore / EPSILON 0.499934846367 / ACTION 0 / REWARD 0.1 / Q_MAX 1.061618e+01
TIMESTEP 10393 / STATE explore / EPSILON 0.499934679733 / ACTION 0 / REWARD 0.1 / Q_MAX 1.060585e+01
TIMESTEP 10394 / STATE explore / EPSILON 0.4999345131 / ACTION 0 / REWARD 0.1 / Q_MAX 1.061247e+01
TIMESTEP 10395 / STATE explore / EPSILON 0.499934346467 / ACTION

TIMESTEP 10460 / STATE explore / EPSILON 0.4999235153 / ACTION 0 / REWARD 0.1 / Q_MAX 1.045705e+01
----------Random Action----------
TIMESTEP 10461 / STATE explore / EPSILON 0.499923348667 / ACTION 0 / REWARD 0.1 / Q_MAX 1.041926e+01
----------Random Action----------
TIMESTEP 10462 / STATE explore / EPSILON 0.499923182033 / ACTION 0 / REWARD 0.1 / Q_MAX 1.039477e+01
TIMESTEP 10463 / STATE explore / EPSILON 0.4999230154 / ACTION 1 / REWARD 0.1 / Q_MAX 1.048127e+01
----------Random Action----------
TIMESTEP 10464 / STATE explore / EPSILON 0.499922848767 / ACTION 0 / REWARD 0.1 / Q_MAX 1.031618e+01
TIMESTEP 10465 / STATE explore / EPSILON 0.499922682133 / ACTION 0 / REWARD 0.1 / Q_MAX 1.021465e+01
TIMESTEP 10466 / STATE explore / EPSILON 0.4999225155 / ACTION 0 / REWARD 0.1 / Q_MAX 1.014261e+01
----------Random Action----------
TIMESTEP 10467 / STATE explore / EPSILON 0.499922348867 / ACTION 0 / REWARD 0.1 / Q_MAX 1.025979e+01
TIMESTEP 10468 / STATE explore / EPSILON 0.499922182233 / ACTI

TIMESTEP 10532 / STATE explore / EPSILON 0.4999115177 / ACTION 0 / REWARD 0.1 / Q_MAX 1.059390e+01
----------Random Action----------
TIMESTEP 10533 / STATE explore / EPSILON 0.499911351067 / ACTION 0 / REWARD 0.1 / Q_MAX 1.057129e+01
TIMESTEP 10534 / STATE explore / EPSILON 0.499911184433 / ACTION 0 / REWARD 0.1 / Q_MAX 1.038112e+01
----------Random Action----------
TIMESTEP 10535 / STATE explore / EPSILON 0.4999110178 / ACTION 0 / REWARD 0.1 / Q_MAX 1.040349e+01
----------Random Action----------
TIMESTEP 10536 / STATE explore / EPSILON 0.499910851167 / ACTION 0 / REWARD 0.1 / Q_MAX 1.049756e+01
TIMESTEP 10537 / STATE explore / EPSILON 0.499910684533 / ACTION 0 / REWARD 0.1 / Q_MAX 1.054640e+01
TIMESTEP 10538 / STATE explore / EPSILON 0.4999105179 / ACTION 1 / REWARD 0.1 / Q_MAX 1.047508e+01
TIMESTEP 10539 / STATE explore / EPSILON 0.499910351267 / ACTION 1 / REWARD 0.1 / Q_MAX 1.049932e+01
----------Random Action----------
TIMESTEP 10540 / STATE explore / EPSILON 0.499910184633 / ACTI

TIMESTEP 10606 / STATE explore / EPSILON 0.499899186833 / ACTION 0 / REWARD 0.1 / Q_MAX 1.005903e+01
----------Random Action----------
TIMESTEP 10607 / STATE explore / EPSILON 0.4998990202 / ACTION 0 / REWARD 0.1 / Q_MAX 1.012794e+01
----------Random Action----------
TIMESTEP 10608 / STATE explore / EPSILON 0.499898853567 / ACTION 0 / REWARD 0.1 / Q_MAX 1.016406e+01
TIMESTEP 10609 / STATE explore / EPSILON 0.499898686933 / ACTION 0 / REWARD 0.1 / Q_MAX 1.029205e+01
----------Random Action----------
TIMESTEP 10610 / STATE explore / EPSILON 0.4998985203 / ACTION 0 / REWARD 0.1 / Q_MAX 1.035885e+01
TIMESTEP 10611 / STATE explore / EPSILON 0.499898353667 / ACTION 0 / REWARD 0.1 / Q_MAX 1.049366e+01
----------Random Action----------
TIMESTEP 10612 / STATE explore / EPSILON 0.499898187033 / ACTION 0 / REWARD 0.1 / Q_MAX 1.056892e+01
----------Random Action----------
TIMESTEP 10613 / STATE explore / EPSILON 0.4998980204 / ACTION 0 / REWARD 0.1 / Q_MAX 1.046923e+01
TIMESTEP 10614 / STATE explo

TIMESTEP 10678 / STATE explore / EPSILON 0.499887189233 / ACTION 0 / REWARD 0.1 / Q_MAX 9.549273e+00
----------Random Action----------
TIMESTEP 10679 / STATE explore / EPSILON 0.4998870226 / ACTION 0 / REWARD 0.1 / Q_MAX 9.528510e+00
----------Random Action----------
TIMESTEP 10680 / STATE explore / EPSILON 0.499886855967 / ACTION 0 / REWARD 0.1 / Q_MAX 9.489556e+00
TIMESTEP 10681 / STATE explore / EPSILON 0.499886689333 / ACTION 0 / REWARD 0.1 / Q_MAX 9.502173e+00
----------Random Action----------
TIMESTEP 10682 / STATE explore / EPSILON 0.4998865227 / ACTION 0 / REWARD 0.1 / Q_MAX 9.416631e+00
----------Random Action----------
TIMESTEP 10683 / STATE explore / EPSILON 0.499886356067 / ACTION 0 / REWARD 0.1 / Q_MAX 9.355603e+00
TIMESTEP 10684 / STATE explore / EPSILON 0.499886189433 / ACTION 0 / REWARD 0.1 / Q_MAX 9.465264e+00
TIMESTEP 10685 / STATE explore / EPSILON 0.4998860228 / ACTION 0 / REWARD 0.1 / Q_MAX 9.534941e+00
----------Random Action----------
TIMESTEP 10686 / STATE explo

TIMESTEP 10752 / STATE explore / EPSILON 0.499874858367 / ACTION 0 / REWARD 0.1 / Q_MAX 9.542760e+00
----------Random Action----------
TIMESTEP 10753 / STATE explore / EPSILON 0.499874691733 / ACTION 0 / REWARD 0.1 / Q_MAX 9.513503e+00
----------Random Action----------
TIMESTEP 10754 / STATE explore / EPSILON 0.4998745251 / ACTION 0 / REWARD 0.1 / Q_MAX 9.428892e+00
TIMESTEP 10755 / STATE explore / EPSILON 0.499874358467 / ACTION 0 / REWARD 0.1 / Q_MAX 9.462823e+00
TIMESTEP 10756 / STATE explore / EPSILON 0.499874191833 / ACTION 0 / REWARD 0.1 / Q_MAX 9.601063e+00
----------Random Action----------
TIMESTEP 10757 / STATE explore / EPSILON 0.4998740252 / ACTION 0 / REWARD 0.1 / Q_MAX 9.590297e+00
----------Random Action----------
TIMESTEP 10758 / STATE explore / EPSILON 0.499873858567 / ACTION 0 / REWARD 0.1 / Q_MAX 9.607892e+00
----------Random Action----------
TIMESTEP 10759 / STATE explore / EPSILON 0.499873691933 / ACTION 0 / REWARD 0.1 / Q_MAX 9.579178e+00
----------Random Action---

TIMESTEP 10820 / STATE explore / EPSILON 0.4998635273 / ACTION 0 / REWARD 0.1 / Q_MAX 9.541696e+00
----------Random Action----------
TIMESTEP 10821 / STATE explore / EPSILON 0.499863360667 / ACTION 0 / REWARD 0.1 / Q_MAX 9.473106e+00
----------Random Action----------
TIMESTEP 10822 / STATE explore / EPSILON 0.499863194033 / ACTION 0 / REWARD 0.1 / Q_MAX 9.409056e+00
----------Random Action----------
TIMESTEP 10823 / STATE explore / EPSILON 0.4998630274 / ACTION 0 / REWARD 0.1 / Q_MAX 9.538079e+00
----------Random Action----------
TIMESTEP 10824 / STATE explore / EPSILON 0.499862860767 / ACTION 0 / REWARD 0.1 / Q_MAX 9.812362e+00
----------Random Action----------
TIMESTEP 10825 / STATE explore / EPSILON 0.499862694133 / ACTION 0 / REWARD 0.1 / Q_MAX 9.865720e+00
TIMESTEP 10826 / STATE explore / EPSILON 0.4998625275 / ACTION 0 / REWARD 0.1 / Q_MAX 9.772633e+00
----------Random Action----------
TIMESTEP 10827 / STATE explore / EPSILON 0.499862360867 / ACTION 0 / REWARD -1 / Q_MAX 9.450275

TIMESTEP 10892 / STATE explore / EPSILON 0.4998515297 / ACTION 0 / REWARD 0.1 / Q_MAX 1.019409e+01
TIMESTEP 10893 / STATE explore / EPSILON 0.499851363067 / ACTION 0 / REWARD 0.1 / Q_MAX 1.007917e+01
TIMESTEP 10894 / STATE explore / EPSILON 0.499851196433 / ACTION 0 / REWARD 0.1 / Q_MAX 9.989830e+00
TIMESTEP 10895 / STATE explore / EPSILON 0.4998510298 / ACTION 0 / REWARD 0.1 / Q_MAX 9.963007e+00
TIMESTEP 10896 / STATE explore / EPSILON 0.499850863167 / ACTION 1 / REWARD 0.1 / Q_MAX 9.933644e+00
TIMESTEP 10897 / STATE explore / EPSILON 0.499850696533 / ACTION 0 / REWARD 0.1 / Q_MAX 9.804156e+00
----------Random Action----------
TIMESTEP 10898 / STATE explore / EPSILON 0.4998505299 / ACTION 0 / REWARD 0.1 / Q_MAX 9.775188e+00
----------Random Action----------
TIMESTEP 10899 / STATE explore / EPSILON 0.499850363267 / ACTION 0 / REWARD 0.1 / Q_MAX 9.901642e+00
TIMESTEP 10900 / STATE explore / EPSILON 0.499850196633 / ACTION 0 / REWARD 0.1 / Q_MAX 9.869005e+00
TIMESTEP 10901 / STATE explor

TIMESTEP 10967 / STATE explore / EPSILON 0.4998390322 / ACTION 0 / REWARD 0.1 / Q_MAX 9.540462e+00
----------Random Action----------
TIMESTEP 10968 / STATE explore / EPSILON 0.499838865567 / ACTION 0 / REWARD 0.1 / Q_MAX 9.604511e+00
TIMESTEP 10969 / STATE explore / EPSILON 0.499838698933 / ACTION 0 / REWARD 0.1 / Q_MAX 9.429012e+00
TIMESTEP 10970 / STATE explore / EPSILON 0.4998385323 / ACTION 0 / REWARD 0.1 / Q_MAX 9.342548e+00
TIMESTEP 10971 / STATE explore / EPSILON 0.499838365667 / ACTION 0 / REWARD 0.1 / Q_MAX 9.296921e+00
TIMESTEP 10972 / STATE explore / EPSILON 0.499838199033 / ACTION 0 / REWARD 0.1 / Q_MAX 9.333774e+00
TIMESTEP 10973 / STATE explore / EPSILON 0.4998380324 / ACTION 0 / REWARD 0.1 / Q_MAX 9.212236e+00
----------Random Action----------
TIMESTEP 10974 / STATE explore / EPSILON 0.499837865767 / ACTION 0 / REWARD 0.1 / Q_MAX 9.271716e+00
----------Random Action----------
TIMESTEP 10975 / STATE explore / EPSILON 0.499837699133 / ACTION 0 / REWARD 0.1 / Q_MAX 9.485410

TIMESTEP 11040 / STATE explore / EPSILON 0.499826867967 / ACTION 1 / REWARD 0.1 / Q_MAX 9.727133e+00
----------Random Action----------
TIMESTEP 11041 / STATE explore / EPSILON 0.499826701333 / ACTION 0 / REWARD 0.1 / Q_MAX 9.708785e+00
TIMESTEP 11042 / STATE explore / EPSILON 0.4998265347 / ACTION 1 / REWARD 0.1 / Q_MAX 9.800988e+00
TIMESTEP 11043 / STATE explore / EPSILON 0.499826368067 / ACTION 1 / REWARD 0.1 / Q_MAX 9.776150e+00
TIMESTEP 11044 / STATE explore / EPSILON 0.499826201433 / ACTION 1 / REWARD 0.1 / Q_MAX 9.879904e+00
----------Random Action----------
TIMESTEP 11045 / STATE explore / EPSILON 0.4998260348 / ACTION 0 / REWARD 0.1 / Q_MAX 9.904032e+00
TIMESTEP 11046 / STATE explore / EPSILON 0.499825868167 / ACTION 1 / REWARD 0.1 / Q_MAX 9.953754e+00
----------Random Action----------
TIMESTEP 11047 / STATE explore / EPSILON 0.499825701533 / ACTION 0 / REWARD 0.1 / Q_MAX 1.000327e+01
----------Random Action----------
TIMESTEP 11048 / STATE explore / EPSILON 0.4998255349 / ACTI

TIMESTEP 11113 / STATE explore / EPSILON 0.499814703733 / ACTION 1 / REWARD 0.1 / Q_MAX 9.379531e+00
TIMESTEP 11114 / STATE explore / EPSILON 0.4998145371 / ACTION 1 / REWARD 0.1 / Q_MAX 9.382546e+00
----------Random Action----------
TIMESTEP 11115 / STATE explore / EPSILON 0.499814370467 / ACTION 0 / REWARD 0.1 / Q_MAX 9.333838e+00
----------Random Action----------
TIMESTEP 11116 / STATE explore / EPSILON 0.499814203833 / ACTION 0 / REWARD 0.1 / Q_MAX 9.346933e+00
TIMESTEP 11117 / STATE explore / EPSILON 0.4998140372 / ACTION 1 / REWARD 0.1 / Q_MAX 9.255980e+00
----------Random Action----------
TIMESTEP 11118 / STATE explore / EPSILON 0.499813870567 / ACTION 1 / REWARD 0.1 / Q_MAX 9.322326e+00
TIMESTEP 11119 / STATE explore / EPSILON 0.499813703933 / ACTION 1 / REWARD 0.1 / Q_MAX 9.097645e+00
----------Random Action----------
TIMESTEP 11120 / STATE explore / EPSILON 0.4998135373 / ACTION 0 / REWARD 0.1 / Q_MAX 9.075093e+00
----------Random Action----------
TIMESTEP 11121 / STATE explo

TIMESTEP 11186 / STATE explore / EPSILON 0.4998025395 / ACTION 0 / REWARD 0.1 / Q_MAX 9.444063e+00
TIMESTEP 11187 / STATE explore / EPSILON 0.499802372867 / ACTION 0 / REWARD 0.1 / Q_MAX 9.564533e+00
TIMESTEP 11188 / STATE explore / EPSILON 0.499802206233 / ACTION 0 / REWARD 0.1 / Q_MAX 9.538958e+00
----------Random Action----------
TIMESTEP 11189 / STATE explore / EPSILON 0.4998020396 / ACTION 0 / REWARD 0.1 / Q_MAX 9.453431e+00
TIMESTEP 11190 / STATE explore / EPSILON 0.499801872967 / ACTION 0 / REWARD 0.1 / Q_MAX 9.384041e+00
TIMESTEP 11191 / STATE explore / EPSILON 0.499801706333 / ACTION 0 / REWARD 0.1 / Q_MAX 9.308790e+00
TIMESTEP 11192 / STATE explore / EPSILON 0.4998015397 / ACTION 0 / REWARD 0.1 / Q_MAX 9.280463e+00
----------Random Action----------
TIMESTEP 11193 / STATE explore / EPSILON 0.499801373067 / ACTION 0 / REWARD 0.1 / Q_MAX 9.311832e+00
TIMESTEP 11194 / STATE explore / EPSILON 0.499801206433 / ACTION 1 / REWARD 0.1 / Q_MAX 9.240400e+00
----------Random Action------

TIMESTEP 11260 / STATE explore / EPSILON 0.499790208633 / ACTION 0 / REWARD 0.1 / Q_MAX 9.060925e+00
TIMESTEP 11261 / STATE explore / EPSILON 0.499790042 / ACTION 0 / REWARD 0.1 / Q_MAX 9.107299e+00
TIMESTEP 11262 / STATE explore / EPSILON 0.499789875367 / ACTION 0 / REWARD 0.1 / Q_MAX 9.087629e+00
----------Random Action----------
TIMESTEP 11263 / STATE explore / EPSILON 0.499789708733 / ACTION 0 / REWARD 1 / Q_MAX 9.095775e+00
TIMESTEP 11264 / STATE explore / EPSILON 0.4997895421 / ACTION 0 / REWARD 0.1 / Q_MAX 8.341633e+00
----------Random Action----------
TIMESTEP 11265 / STATE explore / EPSILON 0.499789375467 / ACTION 0 / REWARD 0.1 / Q_MAX 8.196686e+00
TIMESTEP 11266 / STATE explore / EPSILON 0.499789208833 / ACTION 0 / REWARD 0.1 / Q_MAX 8.200736e+00
TIMESTEP 11267 / STATE explore / EPSILON 0.4997890422 / ACTION 0 / REWARD 0.1 / Q_MAX 8.532312e+00
----------Random Action----------
TIMESTEP 11268 / STATE explore / EPSILON 0.499788875567 / ACTION 0 / REWARD 0.1 / Q_MAX 8.628260e+0

----------Random Action----------
TIMESTEP 11333 / STATE explore / EPSILON 0.4997780444 / ACTION 0 / REWARD 0.1 / Q_MAX 9.533182e+00
----------Random Action----------
TIMESTEP 11334 / STATE explore / EPSILON 0.499777877767 / ACTION 0 / REWARD 0.1 / Q_MAX 9.545402e+00
TIMESTEP 11335 / STATE explore / EPSILON 0.499777711133 / ACTION 0 / REWARD 0.1 / Q_MAX 9.635053e+00
----------Random Action----------
TIMESTEP 11336 / STATE explore / EPSILON 0.4997775445 / ACTION 0 / REWARD 0.1 / Q_MAX 9.685685e+00
TIMESTEP 11337 / STATE explore / EPSILON 0.499777377867 / ACTION 0 / REWARD 0.1 / Q_MAX 9.841408e+00
TIMESTEP 11338 / STATE explore / EPSILON 0.499777211233 / ACTION 0 / REWARD 0.1 / Q_MAX 9.849789e+00
TIMESTEP 11339 / STATE explore / EPSILON 0.4997770446 / ACTION 0 / REWARD 0.1 / Q_MAX 9.751534e+00
----------Random Action----------
TIMESTEP 11340 / STATE explore / EPSILON 0.499776877967 / ACTION 0 / REWARD 0.1 / Q_MAX 9.734254e+00
TIMESTEP 11341 / STATE explore / EPSILON 0.499776711333 / ACTI

TIMESTEP 11406 / STATE explore / EPSILON 0.499765880167 / ACTION 0 / REWARD 0.1 / Q_MAX 9.484728e+00
----------Random Action----------
TIMESTEP 11407 / STATE explore / EPSILON 0.499765713533 / ACTION 0 / REWARD 0.1 / Q_MAX 9.409274e+00
----------Random Action----------
TIMESTEP 11408 / STATE explore / EPSILON 0.4997655469 / ACTION 0 / REWARD 0.1 / Q_MAX 9.397007e+00
----------Random Action----------
TIMESTEP 11409 / STATE explore / EPSILON 0.499765380267 / ACTION 0 / REWARD 0.1 / Q_MAX 9.408240e+00
----------Random Action----------
TIMESTEP 11410 / STATE explore / EPSILON 0.499765213633 / ACTION 0 / REWARD 0.1 / Q_MAX 9.337401e+00
----------Random Action----------
TIMESTEP 11411 / STATE explore / EPSILON 0.499765047 / ACTION 0 / REWARD 0.1 / Q_MAX 9.312557e+00
TIMESTEP 11412 / STATE explore / EPSILON 0.499764880367 / ACTION 0 / REWARD 0.1 / Q_MAX 9.301778e+00
----------Random Action----------
TIMESTEP 11413 / STATE explore / EPSILON 0.499764713733 / ACTION 0 / REWARD 0.1 / Q_MAX 9.3121

TIMESTEP 11477 / STATE explore / EPSILON 0.4997540492 / ACTION 0 / REWARD 0.1 / Q_MAX 9.733635e+00
----------Random Action----------
TIMESTEP 11478 / STATE explore / EPSILON 0.499753882567 / ACTION 0 / REWARD 0.1 / Q_MAX 9.914915e+00
TIMESTEP 11479 / STATE explore / EPSILON 0.499753715933 / ACTION 0 / REWARD 0.1 / Q_MAX 9.786407e+00
TIMESTEP 11480 / STATE explore / EPSILON 0.4997535493 / ACTION 0 / REWARD 0.1 / Q_MAX 9.837244e+00
----------Random Action----------
TIMESTEP 11481 / STATE explore / EPSILON 0.499753382667 / ACTION 0 / REWARD 0.1 / Q_MAX 9.813766e+00
TIMESTEP 11482 / STATE explore / EPSILON 0.499753216033 / ACTION 0 / REWARD 0.1 / Q_MAX 9.699970e+00
----------Random Action----------
TIMESTEP 11483 / STATE explore / EPSILON 0.4997530494 / ACTION 0 / REWARD 0.1 / Q_MAX 9.705447e+00
----------Random Action----------
TIMESTEP 11484 / STATE explore / EPSILON 0.499752882767 / ACTION 0 / REWARD 0.1 / Q_MAX 9.573068e+00
TIMESTEP 11485 / STATE explore / EPSILON 0.499752716133 / ACTI

TIMESTEP 11550 / STATE explore / EPSILON 0.499741884967 / ACTION 0 / REWARD 0.1 / Q_MAX 9.380136e+00
TIMESTEP 11551 / STATE explore / EPSILON 0.499741718333 / ACTION 0 / REWARD 0.1 / Q_MAX 9.286753e+00
----------Random Action----------
TIMESTEP 11552 / STATE explore / EPSILON 0.4997415517 / ACTION 0 / REWARD 0.1 / Q_MAX 9.292871e+00
TIMESTEP 11553 / STATE explore / EPSILON 0.499741385067 / ACTION 0 / REWARD 0.1 / Q_MAX 9.337426e+00
TIMESTEP 11554 / STATE explore / EPSILON 0.499741218433 / ACTION 0 / REWARD 0.1 / Q_MAX 9.345167e+00
TIMESTEP 11555 / STATE explore / EPSILON 0.4997410518 / ACTION 0 / REWARD 0.1 / Q_MAX 9.404081e+00
TIMESTEP 11556 / STATE explore / EPSILON 0.499740885167 / ACTION 0 / REWARD 0.1 / Q_MAX 9.356359e+00
TIMESTEP 11557 / STATE explore / EPSILON 0.499740718533 / ACTION 0 / REWARD 0.1 / Q_MAX 9.407309e+00
----------Random Action----------
TIMESTEP 11558 / STATE explore / EPSILON 0.4997405519 / ACTION 0 / REWARD 0.1 / Q_MAX 9.454782e+00
TIMESTEP 11559 / STATE explor

TIMESTEP 11624 / STATE explore / EPSILON 0.4997295541 / ACTION 1 / REWARD 0.1 / Q_MAX 9.172119e+00
----------Random Action----------
TIMESTEP 11625 / STATE explore / EPSILON 0.499729387467 / ACTION 0 / REWARD 0.1 / Q_MAX 8.228624e+00
TIMESTEP 11626 / STATE explore / EPSILON 0.499729220833 / ACTION 0 / REWARD 0.1 / Q_MAX 5.112703e+00
----------Random Action----------
TIMESTEP 11627 / STATE explore / EPSILON 0.4997290542 / ACTION 0 / REWARD 0.1 / Q_MAX -4.682132e+00
----------Random Action----------
TIMESTEP 11628 / STATE explore / EPSILON 0.499728887567 / ACTION 0 / REWARD -1 / Q_MAX -5.247371e+00
TIMESTEP 11629 / STATE explore / EPSILON 0.499728720933 / ACTION 0 / REWARD 0.1 / Q_MAX 9.533152e+00
TIMESTEP 11630 / STATE explore / EPSILON 0.4997285543 / ACTION 0 / REWARD 0.1 / Q_MAX 9.711420e+00
TIMESTEP 11631 / STATE explore / EPSILON 0.499728387667 / ACTION 0 / REWARD 0.1 / Q_MAX 9.605094e+00
----------Random Action----------
TIMESTEP 11632 / STATE explore / EPSILON 0.499728221033 / ACT

TIMESTEP 11698 / STATE explore / EPSILON 0.499717223233 / ACTION 0 / REWARD 0.1 / Q_MAX 9.838905e+00
TIMESTEP 11699 / STATE explore / EPSILON 0.4997170566 / ACTION 0 / REWARD 0.1 / Q_MAX 9.689032e+00
TIMESTEP 11700 / STATE explore / EPSILON 0.499716889967 / ACTION 0 / REWARD 0.1 / Q_MAX 9.587113e+00
TIMESTEP 11701 / STATE explore / EPSILON 0.499716723333 / ACTION 0 / REWARD 0.1 / Q_MAX 9.631574e+00
----------Random Action----------
TIMESTEP 11702 / STATE explore / EPSILON 0.4997165567 / ACTION 0 / REWARD 0.1 / Q_MAX 9.526945e+00
----------Random Action----------
TIMESTEP 11703 / STATE explore / EPSILON 0.499716390067 / ACTION 0 / REWARD 0.1 / Q_MAX 9.616035e+00
TIMESTEP 11704 / STATE explore / EPSILON 0.499716223433 / ACTION 0 / REWARD 0.1 / Q_MAX 9.607828e+00
----------Random Action----------
TIMESTEP 11705 / STATE explore / EPSILON 0.4997160568 / ACTION 0 / REWARD 0.1 / Q_MAX 9.666358e+00
TIMESTEP 11706 / STATE explore / EPSILON 0.499715890167 / ACTION 0 / REWARD 0.1 / Q_MAX 9.616796

TIMESTEP 11772 / STATE explore / EPSILON 0.499704892367 / ACTION 0 / REWARD 0.1 / Q_MAX 7.447281e+00
TIMESTEP 11773 / STATE explore / EPSILON 0.499704725733 / ACTION 0 / REWARD 0.1 / Q_MAX 8.232226e+00
----------Random Action----------
TIMESTEP 11774 / STATE explore / EPSILON 0.4997045591 / ACTION 0 / REWARD 1 / Q_MAX 8.441842e+00
----------Random Action----------
TIMESTEP 11775 / STATE explore / EPSILON 0.499704392467 / ACTION 0 / REWARD 0.1 / Q_MAX 7.685613e+00
TIMESTEP 11776 / STATE explore / EPSILON 0.499704225833 / ACTION 0 / REWARD 0.1 / Q_MAX 7.577349e+00
TIMESTEP 11777 / STATE explore / EPSILON 0.4997040592 / ACTION 0 / REWARD 0.1 / Q_MAX 7.720183e+00
----------Random Action----------
TIMESTEP 11778 / STATE explore / EPSILON 0.499703892567 / ACTION 0 / REWARD 0.1 / Q_MAX 7.966664e+00
----------Random Action----------
TIMESTEP 11779 / STATE explore / EPSILON 0.499703725933 / ACTION 0 / REWARD 0.1 / Q_MAX 8.057782e+00
TIMESTEP 11780 / STATE explore / EPSILON 0.4997035593 / ACTION

TIMESTEP 11846 / STATE explore / EPSILON 0.4996925615 / ACTION 1 / REWARD 0.1 / Q_MAX 8.849188e+00
----------Random Action----------
TIMESTEP 11847 / STATE explore / EPSILON 0.499692394867 / ACTION 1 / REWARD 0.1 / Q_MAX 8.733484e+00
TIMESTEP 11848 / STATE explore / EPSILON 0.499692228233 / ACTION 1 / REWARD 0.1 / Q_MAX 8.880325e+00
TIMESTEP 11849 / STATE explore / EPSILON 0.4996920616 / ACTION 1 / REWARD 0.1 / Q_MAX 8.984250e+00
TIMESTEP 11850 / STATE explore / EPSILON 0.499691894967 / ACTION 0 / REWARD 0.1 / Q_MAX 9.067121e+00
TIMESTEP 11851 / STATE explore / EPSILON 0.499691728333 / ACTION 0 / REWARD 0.1 / Q_MAX 9.196303e+00
----------Random Action----------
TIMESTEP 11852 / STATE explore / EPSILON 0.4996915617 / ACTION 0 / REWARD 0.1 / Q_MAX 9.231471e+00
----------Random Action----------
TIMESTEP 11853 / STATE explore / EPSILON 0.499691395067 / ACTION 0 / REWARD 0.1 / Q_MAX 9.219416e+00
TIMESTEP 11854 / STATE explore / EPSILON 0.499691228433 / ACTION 0 / REWARD 0.1 / Q_MAX 9.175116

TIMESTEP 11919 / STATE explore / EPSILON 0.499680397267 / ACTION 0 / REWARD 0.1 / Q_MAX 9.238986e+00
----------Random Action----------
TIMESTEP 11920 / STATE explore / EPSILON 0.499680230633 / ACTION 0 / REWARD 0.1 / Q_MAX 9.154938e+00
----------Random Action----------
TIMESTEP 11921 / STATE explore / EPSILON 0.499680064 / ACTION 0 / REWARD 0.1 / Q_MAX 9.176527e+00
TIMESTEP 11922 / STATE explore / EPSILON 0.499679897367 / ACTION 0 / REWARD 0.1 / Q_MAX 9.114064e+00
----------Random Action----------
TIMESTEP 11923 / STATE explore / EPSILON 0.499679730733 / ACTION 1 / REWARD 0.1 / Q_MAX 9.228459e+00
TIMESTEP 11924 / STATE explore / EPSILON 0.4996795641 / ACTION 0 / REWARD 0.1 / Q_MAX 9.029815e+00
TIMESTEP 11925 / STATE explore / EPSILON 0.499679397467 / ACTION 0 / REWARD 0.1 / Q_MAX 8.894695e+00
TIMESTEP 11926 / STATE explore / EPSILON 0.499679230833 / ACTION 0 / REWARD 0.1 / Q_MAX 8.775770e+00
----------Random Action----------
TIMESTEP 11927 / STATE explore / EPSILON 0.4996790642 / ACTIO

----------Random Action----------
TIMESTEP 11992 / STATE explore / EPSILON 0.499668233033 / ACTION 0 / REWARD 0.1 / Q_MAX 7.692490e+00
TIMESTEP 11993 / STATE explore / EPSILON 0.4996680664 / ACTION 0 / REWARD 0.1 / Q_MAX 7.793612e+00
TIMESTEP 11994 / STATE explore / EPSILON 0.499667899767 / ACTION 0 / REWARD 0.1 / Q_MAX 7.895875e+00
----------Random Action----------
TIMESTEP 11995 / STATE explore / EPSILON 0.499667733133 / ACTION 0 / REWARD 0.1 / Q_MAX 7.953825e+00
TIMESTEP 11996 / STATE explore / EPSILON 0.4996675665 / ACTION 0 / REWARD 0.1 / Q_MAX 8.034584e+00
TIMESTEP 11997 / STATE explore / EPSILON 0.499667399867 / ACTION 0 / REWARD 0.1 / Q_MAX 7.936477e+00
TIMESTEP 11998 / STATE explore / EPSILON 0.499667233233 / ACTION 0 / REWARD 0.1 / Q_MAX 7.864903e+00
TIMESTEP 11999 / STATE explore / EPSILON 0.4996670666 / ACTION 0 / REWARD 0.1 / Q_MAX 7.933800e+00
----------Random Action----------
TIMESTEP 12000 / STATE explore / EPSILON 0.499666899967 / ACTION 0 / REWARD 0.1 / Q_MAX 8.107986

TIMESTEP 12067 / STATE explore / EPSILON 0.499655735533 / ACTION 0 / REWARD 0.1 / Q_MAX 7.902432e+00
TIMESTEP 12068 / STATE explore / EPSILON 0.4996555689 / ACTION 0 / REWARD 0.1 / Q_MAX 7.961653e+00
----------Random Action----------
TIMESTEP 12069 / STATE explore / EPSILON 0.499655402267 / ACTION 0 / REWARD 0.1 / Q_MAX 8.006903e+00
TIMESTEP 12070 / STATE explore / EPSILON 0.499655235633 / ACTION 0 / REWARD 0.1 / Q_MAX 8.028162e+00
TIMESTEP 12071 / STATE explore / EPSILON 0.499655069 / ACTION 1 / REWARD 0.1 / Q_MAX 7.891852e+00
----------Random Action----------
TIMESTEP 12072 / STATE explore / EPSILON 0.499654902367 / ACTION 0 / REWARD 0.1 / Q_MAX 7.919794e+00
----------Random Action----------
TIMESTEP 12073 / STATE explore / EPSILON 0.499654735733 / ACTION 0 / REWARD 0.1 / Q_MAX 7.702316e+00
----------Random Action----------
TIMESTEP 12074 / STATE explore / EPSILON 0.4996545691 / ACTION 0 / REWARD 0.1 / Q_MAX 7.263402e+00
----------Random Action----------
TIMESTEP 12075 / STATE explor

----------Random Action----------
TIMESTEP 12141 / STATE explore / EPSILON 0.499643404667 / ACTION 0 / REWARD 0.1 / Q_MAX 8.418542e+00
TIMESTEP 12142 / STATE explore / EPSILON 0.499643238033 / ACTION 0 / REWARD 0.1 / Q_MAX 8.335519e+00
TIMESTEP 12143 / STATE explore / EPSILON 0.4996430714 / ACTION 0 / REWARD 0.1 / Q_MAX 8.193831e+00
TIMESTEP 12144 / STATE explore / EPSILON 0.499642904767 / ACTION 1 / REWARD 0.1 / Q_MAX 8.050090e+00
----------Random Action----------
TIMESTEP 12145 / STATE explore / EPSILON 0.499642738133 / ACTION 0 / REWARD 0.1 / Q_MAX 7.862170e+00
TIMESTEP 12146 / STATE explore / EPSILON 0.4996425715 / ACTION 0 / REWARD 0.1 / Q_MAX 7.828358e+00
TIMESTEP 12147 / STATE explore / EPSILON 0.499642404867 / ACTION 0 / REWARD 0.1 / Q_MAX 7.883055e+00
----------Random Action----------
TIMESTEP 12148 / STATE explore / EPSILON 0.499642238233 / ACTION 0 / REWARD 0.1 / Q_MAX 7.851366e+00
----------Random Action----------
TIMESTEP 12149 / STATE explore / EPSILON 0.4996420716 / ACTI

TIMESTEP 12214 / STATE explore / EPSILON 0.499631240433 / ACTION 0 / REWARD 0.1 / Q_MAX 8.353026e+00
----------Random Action----------
TIMESTEP 12215 / STATE explore / EPSILON 0.4996310738 / ACTION 0 / REWARD 0.1 / Q_MAX 8.330811e+00
----------Random Action----------
TIMESTEP 12216 / STATE explore / EPSILON 0.499630907167 / ACTION 0 / REWARD 0.1 / Q_MAX 8.397731e+00
TIMESTEP 12217 / STATE explore / EPSILON 0.499630740533 / ACTION 0 / REWARD 0.1 / Q_MAX 8.350965e+00
----------Random Action----------
TIMESTEP 12218 / STATE explore / EPSILON 0.4996305739 / ACTION 0 / REWARD 0.1 / Q_MAX 8.549487e+00
----------Random Action----------
TIMESTEP 12219 / STATE explore / EPSILON 0.499630407267 / ACTION 0 / REWARD 0.1 / Q_MAX 8.706181e+00
TIMESTEP 12220 / STATE explore / EPSILON 0.499630240633 / ACTION 1 / REWARD 0.1 / Q_MAX 8.736917e+00
----------Random Action----------
TIMESTEP 12221 / STATE explore / EPSILON 0.499630074 / ACTION 0 / REWARD 0.1 / Q_MAX 8.569011e+00
----------Random Action------

TIMESTEP 12283 / STATE explore / EPSILON 0.499619742733 / ACTION 1 / REWARD 0.1 / Q_MAX 8.050606e+00
TIMESTEP 12284 / STATE explore / EPSILON 0.4996195761 / ACTION 1 / REWARD 0.1 / Q_MAX 8.068865e+00
TIMESTEP 12285 / STATE explore / EPSILON 0.499619409467 / ACTION 1 / REWARD 0.1 / Q_MAX 7.955486e+00
----------Random Action----------
TIMESTEP 12286 / STATE explore / EPSILON 0.499619242833 / ACTION 0 / REWARD 0.1 / Q_MAX 7.770183e+00
----------Random Action----------
TIMESTEP 12287 / STATE explore / EPSILON 0.4996190762 / ACTION 0 / REWARD 0.1 / Q_MAX 7.856215e+00
----------Random Action----------
TIMESTEP 12288 / STATE explore / EPSILON 0.499618909567 / ACTION 0 / REWARD 0.1 / Q_MAX 7.829394e+00
TIMESTEP 12289 / STATE explore / EPSILON 0.499618742933 / ACTION 1 / REWARD 0.1 / Q_MAX 7.702341e+00
TIMESTEP 12290 / STATE explore / EPSILON 0.4996185763 / ACTION 1 / REWARD 0.1 / Q_MAX 7.764350e+00
TIMESTEP 12291 / STATE explore / EPSILON 0.499618409667 / ACTION 1 / REWARD 0.1 / Q_MAX 7.406396

----------Random Action----------
TIMESTEP 12357 / STATE explore / EPSILON 0.499607411867 / ACTION 0 / REWARD 0.1 / Q_MAX 8.442167e+00
TIMESTEP 12358 / STATE explore / EPSILON 0.499607245233 / ACTION 0 / REWARD 0.1 / Q_MAX 8.720217e+00
----------Random Action----------
TIMESTEP 12359 / STATE explore / EPSILON 0.4996070786 / ACTION 0 / REWARD 0.1 / Q_MAX 9.012847e+00
TIMESTEP 12360 / STATE explore / EPSILON 0.499606911967 / ACTION 0 / REWARD 0.1 / Q_MAX 8.999302e+00
----------Random Action----------
TIMESTEP 12361 / STATE explore / EPSILON 0.499606745333 / ACTION 0 / REWARD 0.1 / Q_MAX 8.936246e+00
----------Random Action----------
TIMESTEP 12362 / STATE explore / EPSILON 0.4996065787 / ACTION 0 / REWARD 0.1 / Q_MAX 8.815606e+00
TIMESTEP 12363 / STATE explore / EPSILON 0.499606412067 / ACTION 0 / REWARD 0.1 / Q_MAX 8.825075e+00
TIMESTEP 12364 / STATE explore / EPSILON 0.499606245433 / ACTION 0 / REWARD 0.1 / Q_MAX 8.744648e+00
----------Random Action----------
TIMESTEP 12365 / STATE exp

TIMESTEP 12430 / STATE explore / EPSILON 0.499595247633 / ACTION 0 / REWARD 0.1 / Q_MAX 8.313896e+00
----------Random Action----------
TIMESTEP 12431 / STATE explore / EPSILON 0.499595081 / ACTION 0 / REWARD 0.1 / Q_MAX 8.381232e+00
TIMESTEP 12432 / STATE explore / EPSILON 0.499594914367 / ACTION 0 / REWARD 0.1 / Q_MAX 8.425660e+00
TIMESTEP 12433 / STATE explore / EPSILON 0.499594747733 / ACTION 0 / REWARD 0.1 / Q_MAX 8.387722e+00
----------Random Action----------
TIMESTEP 12434 / STATE explore / EPSILON 0.4995945811 / ACTION 0 / REWARD 0.1 / Q_MAX 8.301511e+00
----------Random Action----------
TIMESTEP 12435 / STATE explore / EPSILON 0.499594414467 / ACTION 0 / REWARD 0.1 / Q_MAX 8.319115e+00
----------Random Action----------
TIMESTEP 12436 / STATE explore / EPSILON 0.499594247833 / ACTION 0 / REWARD 0.1 / Q_MAX 8.349537e+00
TIMESTEP 12437 / STATE explore / EPSILON 0.4995940812 / ACTION 0 / REWARD 0.1 / Q_MAX 8.373896e+00
----------Random Action----------
TIMESTEP 12438 / STATE explor

TIMESTEP 12504 / STATE explore / EPSILON 0.499582916767 / ACTION 0 / REWARD 0.1 / Q_MAX 8.712107e+00
----------Random Action----------
TIMESTEP 12505 / STATE explore / EPSILON 0.499582750133 / ACTION 0 / REWARD 0.1 / Q_MAX 8.631028e+00
TIMESTEP 12506 / STATE explore / EPSILON 0.4995825835 / ACTION 0 / REWARD 0.1 / Q_MAX 8.627235e+00
----------Random Action----------
TIMESTEP 12507 / STATE explore / EPSILON 0.499582416867 / ACTION 0 / REWARD 0.1 / Q_MAX 8.612678e+00
----------Random Action----------
TIMESTEP 12508 / STATE explore / EPSILON 0.499582250233 / ACTION 0 / REWARD 0.1 / Q_MAX 8.674616e+00
----------Random Action----------
TIMESTEP 12509 / STATE explore / EPSILON 0.4995820836 / ACTION 0 / REWARD 0.1 / Q_MAX 8.654848e+00
----------Random Action----------
TIMESTEP 12510 / STATE explore / EPSILON 0.499581916967 / ACTION 0 / REWARD 0.1 / Q_MAX 8.697080e+00
TIMESTEP 12511 / STATE explore / EPSILON 0.499581750333 / ACTION 0 / REWARD 0.1 / Q_MAX 8.712246e+00
TIMESTEP 12512 / STATE exp

TIMESTEP 12577 / STATE explore / EPSILON 0.499570752533 / ACTION 0 / REWARD 0.1 / Q_MAX 8.233590e+00
TIMESTEP 12578 / STATE explore / EPSILON 0.4995705859 / ACTION 0 / REWARD 0.1 / Q_MAX 8.108367e+00
TIMESTEP 12579 / STATE explore / EPSILON 0.499570419267 / ACTION 0 / REWARD 0.1 / Q_MAX 8.007236e+00
----------Random Action----------
TIMESTEP 12580 / STATE explore / EPSILON 0.499570252633 / ACTION 0 / REWARD 0.1 / Q_MAX 8.000230e+00
----------Random Action----------
TIMESTEP 12581 / STATE explore / EPSILON 0.499570086 / ACTION 1 / REWARD 0.1 / Q_MAX 7.957019e+00
TIMESTEP 12582 / STATE explore / EPSILON 0.499569919367 / ACTION 0 / REWARD 0.1 / Q_MAX 7.558106e+00
TIMESTEP 12583 / STATE explore / EPSILON 0.499569752733 / ACTION 0 / REWARD 0.1 / Q_MAX 7.137244e+00
----------Random Action----------
TIMESTEP 12584 / STATE explore / EPSILON 0.4995695861 / ACTION 0 / REWARD 0.1 / Q_MAX 7.258513e+00
TIMESTEP 12585 / STATE explore / EPSILON 0.499569419467 / ACTION 0 / REWARD 0.1 / Q_MAX 7.130388e

TIMESTEP 12650 / STATE explore / EPSILON 0.4995585883 / ACTION 0 / REWARD 0.1 / Q_MAX 8.338357e+00
----------Random Action----------
TIMESTEP 12651 / STATE explore / EPSILON 0.499558421667 / ACTION 0 / REWARD 0.1 / Q_MAX 8.360354e+00
----------Random Action----------
TIMESTEP 12652 / STATE explore / EPSILON 0.499558255033 / ACTION 0 / REWARD 0.1 / Q_MAX 8.378780e+00
----------Random Action----------
TIMESTEP 12653 / STATE explore / EPSILON 0.4995580884 / ACTION 0 / REWARD 0.1 / Q_MAX 8.145172e+00
TIMESTEP 12654 / STATE explore / EPSILON 0.499557921767 / ACTION 0 / REWARD 0.1 / Q_MAX 7.995371e+00
----------Random Action----------
TIMESTEP 12655 / STATE explore / EPSILON 0.499557755133 / ACTION 0 / REWARD 0.1 / Q_MAX 7.992302e+00
----------Random Action----------
TIMESTEP 12656 / STATE explore / EPSILON 0.4995575885 / ACTION 0 / REWARD 0.1 / Q_MAX 7.959537e+00
TIMESTEP 12657 / STATE explore / EPSILON 0.499557421867 / ACTION 0 / REWARD 0.1 / Q_MAX 7.863590e+00
TIMESTEP 12658 / STATE explo

----------Random Action----------
TIMESTEP 12724 / STATE explore / EPSILON 0.499546257433 / ACTION 0 / REWARD 0.1 / Q_MAX 8.096927e+00
TIMESTEP 12725 / STATE explore / EPSILON 0.4995460908 / ACTION 1 / REWARD 0.1 / Q_MAX 7.953979e+00
----------Random Action----------
TIMESTEP 12726 / STATE explore / EPSILON 0.499545924167 / ACTION 0 / REWARD 0.1 / Q_MAX 8.118390e+00
----------Random Action----------
TIMESTEP 12727 / STATE explore / EPSILON 0.499545757533 / ACTION 1 / REWARD 0.1 / Q_MAX 8.244433e+00
TIMESTEP 12728 / STATE explore / EPSILON 0.4995455909 / ACTION 1 / REWARD 0.1 / Q_MAX 8.198865e+00
TIMESTEP 12729 / STATE explore / EPSILON 0.499545424267 / ACTION 1 / REWARD 0.1 / Q_MAX 8.274425e+00
----------Random Action----------
TIMESTEP 12730 / STATE explore / EPSILON 0.499545257633 / ACTION 0 / REWARD 0.1 / Q_MAX 8.327547e+00
----------Random Action----------
TIMESTEP 12731 / STATE explore / EPSILON 0.499545091 / ACTION 0 / REWARD 0.1 / Q_MAX 8.382608e+00
----------Random Action------

TIMESTEP 12799 / STATE explore / EPSILON 0.499533759933 / ACTION 0 / REWARD 0.1 / Q_MAX 6.574514e+00
TIMESTEP 12800 / STATE explore / EPSILON 0.4995335933 / ACTION 0 / REWARD 0.1 / Q_MAX 6.541318e+00
TIMESTEP 12801 / STATE explore / EPSILON 0.499533426667 / ACTION 0 / REWARD 0.1 / Q_MAX 6.686034e+00
----------Random Action----------
TIMESTEP 12802 / STATE explore / EPSILON 0.499533260033 / ACTION 0 / REWARD 0.1 / Q_MAX 6.703907e+00
----------Random Action----------
TIMESTEP 12803 / STATE explore / EPSILON 0.4995330934 / ACTION 0 / REWARD 0.1 / Q_MAX 6.767634e+00
----------Random Action----------
TIMESTEP 12804 / STATE explore / EPSILON 0.499532926767 / ACTION 0 / REWARD 0.1 / Q_MAX 7.073629e+00
TIMESTEP 12805 / STATE explore / EPSILON 0.499532760133 / ACTION 0 / REWARD 0.1 / Q_MAX 7.165291e+00
TIMESTEP 12806 / STATE explore / EPSILON 0.4995325935 / ACTION 0 / REWARD 0.1 / Q_MAX 7.087637e+00
----------Random Action----------
TIMESTEP 12807 / STATE explore / EPSILON 0.499532426867 / ACTI

----------Random Action----------
TIMESTEP 12873 / STATE explore / EPSILON 0.499521429067 / ACTION 0 / REWARD 0.1 / Q_MAX 6.834807e+00
TIMESTEP 12874 / STATE explore / EPSILON 0.499521262433 / ACTION 0 / REWARD 0.1 / Q_MAX 6.767045e+00
TIMESTEP 12875 / STATE explore / EPSILON 0.4995210958 / ACTION 0 / REWARD 0.1 / Q_MAX 7.232478e+00
----------Random Action----------
TIMESTEP 12876 / STATE explore / EPSILON 0.499520929167 / ACTION 1 / REWARD 0.1 / Q_MAX 7.175650e+00
----------Random Action----------
TIMESTEP 12877 / STATE explore / EPSILON 0.499520762533 / ACTION 0 / REWARD 0.1 / Q_MAX 5.924383e+00
TIMESTEP 12878 / STATE explore / EPSILON 0.4995205959 / ACTION 0 / REWARD 0.1 / Q_MAX 4.573197e+00
TIMESTEP 12879 / STATE explore / EPSILON 0.499520429267 / ACTION 0 / REWARD 0.1 / Q_MAX 4.287041e+00
----------Random Action----------
TIMESTEP 12880 / STATE explore / EPSILON 0.499520262633 / ACTION 0 / REWARD -1 / Q_MAX -1.599877e+00
TIMESTEP 12881 / STATE explore / EPSILON 0.499520096 / ACTIO

TIMESTEP 12949 / STATE explore / EPSILON 0.499508764933 / ACTION 1 / REWARD 0.1 / Q_MAX 7.007940e+00
TIMESTEP 12950 / STATE explore / EPSILON 0.4995085983 / ACTION 1 / REWARD 0.1 / Q_MAX 7.007864e+00
TIMESTEP 12951 / STATE explore / EPSILON 0.499508431667 / ACTION 1 / REWARD 0.1 / Q_MAX 7.048129e+00
----------Random Action----------
TIMESTEP 12952 / STATE explore / EPSILON 0.499508265033 / ACTION 0 / REWARD 0.1 / Q_MAX 6.962546e+00
TIMESTEP 12953 / STATE explore / EPSILON 0.4995080984 / ACTION 0 / REWARD 0.1 / Q_MAX 6.690657e+00
----------Random Action----------
TIMESTEP 12954 / STATE explore / EPSILON 0.499507931767 / ACTION 0 / REWARD 0.1 / Q_MAX 6.565053e+00
----------Random Action----------
TIMESTEP 12955 / STATE explore / EPSILON 0.499507765133 / ACTION 0 / REWARD 0.1 / Q_MAX 6.938869e+00
TIMESTEP 12956 / STATE explore / EPSILON 0.4995075985 / ACTION 0 / REWARD 0.1 / Q_MAX 6.989237e+00
TIMESTEP 12957 / STATE explore / EPSILON 0.499507431867 / ACTION 0 / REWARD 0.1 / Q_MAX 7.123274

----------Random Action----------
TIMESTEP 13025 / STATE explore / EPSILON 0.4994961008 / ACTION 0 / REWARD 0.1 / Q_MAX 7.018001e+00
----------Random Action----------
TIMESTEP 13026 / STATE explore / EPSILON 0.499495934167 / ACTION 0 / REWARD 0.1 / Q_MAX 7.195670e+00
TIMESTEP 13027 / STATE explore / EPSILON 0.499495767533 / ACTION 0 / REWARD 1 / Q_MAX 7.326911e+00
----------Random Action----------
TIMESTEP 13028 / STATE explore / EPSILON 0.4994956009 / ACTION 1 / REWARD 0.1 / Q_MAX 6.519952e+00
----------Random Action----------
TIMESTEP 13029 / STATE explore / EPSILON 0.499495434267 / ACTION 0 / REWARD 0.1 / Q_MAX 4.524681e+00
TIMESTEP 13030 / STATE explore / EPSILON 0.499495267633 / ACTION 0 / REWARD 0.1 / Q_MAX 3.732284e+00
TIMESTEP 13031 / STATE explore / EPSILON 0.499495101 / ACTION 0 / REWARD -1 / Q_MAX 2.102074e+00
TIMESTEP 13032 / STATE explore / EPSILON 0.499494934367 / ACTION 0 / REWARD 0.1 / Q_MAX 8.145299e+00
TIMESTEP 13033 / STATE explore / EPSILON 0.499494767733 / ACTION 0

TIMESTEP 13098 / STATE explore / EPSILON 0.499483936567 / ACTION 0 / REWARD 0.1 / Q_MAX 6.893154e+00
----------Random Action----------
TIMESTEP 13099 / STATE explore / EPSILON 0.499483769933 / ACTION 0 / REWARD 0.1 / Q_MAX 7.076243e+00
----------Random Action----------
TIMESTEP 13100 / STATE explore / EPSILON 0.4994836033 / ACTION 0 / REWARD 0.1 / Q_MAX 7.151840e+00
----------Random Action----------
TIMESTEP 13101 / STATE explore / EPSILON 0.499483436667 / ACTION 0 / REWARD 0.1 / Q_MAX 7.177778e+00
----------Random Action----------
TIMESTEP 13102 / STATE explore / EPSILON 0.499483270033 / ACTION 0 / REWARD 0.1 / Q_MAX 7.250573e+00
TIMESTEP 13103 / STATE explore / EPSILON 0.4994831034 / ACTION 0 / REWARD 0.1 / Q_MAX 7.303692e+00
TIMESTEP 13104 / STATE explore / EPSILON 0.499482936767 / ACTION 0 / REWARD 0.1 / Q_MAX 7.182135e+00
TIMESTEP 13105 / STATE explore / EPSILON 0.499482770133 / ACTION 0 / REWARD 0.1 / Q_MAX 7.171405e+00
TIMESTEP 13106 / STATE explore / EPSILON 0.4994826035 / ACTI

----------Random Action----------
TIMESTEP 13173 / STATE explore / EPSILON 0.499471439067 / ACTION 0 / REWARD 0.1 / Q_MAX 7.007874e+00
TIMESTEP 13174 / STATE explore / EPSILON 0.499471272433 / ACTION 0 / REWARD 0.1 / Q_MAX 6.952697e+00
TIMESTEP 13175 / STATE explore / EPSILON 0.4994711058 / ACTION 0 / REWARD 0.1 / Q_MAX 7.051875e+00
TIMESTEP 13176 / STATE explore / EPSILON 0.499470939167 / ACTION 0 / REWARD 0.1 / Q_MAX 6.961770e+00
TIMESTEP 13177 / STATE explore / EPSILON 0.499470772533 / ACTION 0 / REWARD 0.1 / Q_MAX 7.031514e+00
TIMESTEP 13178 / STATE explore / EPSILON 0.4994706059 / ACTION 0 / REWARD 1 / Q_MAX 7.239803e+00
----------Random Action----------
TIMESTEP 13179 / STATE explore / EPSILON 0.499470439267 / ACTION 0 / REWARD 0.1 / Q_MAX 6.687264e+00
----------Random Action----------
TIMESTEP 13180 / STATE explore / EPSILON 0.499470272633 / ACTION 0 / REWARD 0.1 / Q_MAX 6.517971e+00
----------Random Action----------
TIMESTEP 13181 / STATE explore / EPSILON 0.499470106 / ACTION 

TIMESTEP 13245 / STATE explore / EPSILON 0.499459441467 / ACTION 0 / REWARD 0.1 / Q_MAX 7.427575e+00
TIMESTEP 13246 / STATE explore / EPSILON 0.499459274833 / ACTION 0 / REWARD 0.1 / Q_MAX 7.382982e+00
TIMESTEP 13247 / STATE explore / EPSILON 0.4994591082 / ACTION 0 / REWARD 0.1 / Q_MAX 7.427059e+00
----------Random Action----------
TIMESTEP 13248 / STATE explore / EPSILON 0.499458941567 / ACTION 0 / REWARD 0.1 / Q_MAX 7.612102e+00
TIMESTEP 13249 / STATE explore / EPSILON 0.499458774933 / ACTION 0 / REWARD 0.1 / Q_MAX 7.724119e+00
TIMESTEP 13250 / STATE explore / EPSILON 0.4994586083 / ACTION 0 / REWARD 0.1 / Q_MAX 7.756005e+00
TIMESTEP 13251 / STATE explore / EPSILON 0.499458441667 / ACTION 0 / REWARD 0.1 / Q_MAX 7.785498e+00
----------Random Action----------
TIMESTEP 13252 / STATE explore / EPSILON 0.499458275033 / ACTION 0 / REWARD 0.1 / Q_MAX 7.910994e+00
TIMESTEP 13253 / STATE explore / EPSILON 0.4994581084 / ACTION 0 / REWARD 0.1 / Q_MAX 8.047534e+00
----------Random Action------

TIMESTEP 13317 / STATE explore / EPSILON 0.499447443867 / ACTION 0 / REWARD 0.1 / Q_MAX 7.974262e+00
TIMESTEP 13318 / STATE explore / EPSILON 0.499447277233 / ACTION 0 / REWARD 0.1 / Q_MAX 8.170321e+00
----------Random Action----------
TIMESTEP 13319 / STATE explore / EPSILON 0.4994471106 / ACTION 0 / REWARD 0.1 / Q_MAX 8.218432e+00
----------Random Action----------
TIMESTEP 13320 / STATE explore / EPSILON 0.499446943967 / ACTION 0 / REWARD 0.1 / Q_MAX 8.203568e+00
----------Random Action----------
TIMESTEP 13321 / STATE explore / EPSILON 0.499446777333 / ACTION 0 / REWARD 0.1 / Q_MAX 8.159308e+00
----------Random Action----------
TIMESTEP 13322 / STATE explore / EPSILON 0.4994466107 / ACTION 0 / REWARD 0.1 / Q_MAX 8.157807e+00
----------Random Action----------
TIMESTEP 13323 / STATE explore / EPSILON 0.499446444067 / ACTION 1 / REWARD 0.1 / Q_MAX 8.363630e+00
TIMESTEP 13324 / STATE explore / EPSILON 0.499446277433 / ACTION 0 / REWARD 0.1 / Q_MAX 8.115857e+00
TIMESTEP 13325 / STATE exp

TIMESTEP 13391 / STATE explore / EPSILON 0.499435113 / ACTION 0 / REWARD 0.1 / Q_MAX 7.575354e+00
TIMESTEP 13392 / STATE explore / EPSILON 0.499434946367 / ACTION 0 / REWARD 0.1 / Q_MAX 7.657453e+00
----------Random Action----------
TIMESTEP 13393 / STATE explore / EPSILON 0.499434779733 / ACTION 0 / REWARD 0.1 / Q_MAX 7.757400e+00
----------Random Action----------
TIMESTEP 13394 / STATE explore / EPSILON 0.4994346131 / ACTION 0 / REWARD 0.1 / Q_MAX 7.725925e+00
----------Random Action----------
TIMESTEP 13395 / STATE explore / EPSILON 0.499434446467 / ACTION 0 / REWARD 0.1 / Q_MAX 7.856310e+00
TIMESTEP 13396 / STATE explore / EPSILON 0.499434279833 / ACTION 0 / REWARD 0.1 / Q_MAX 7.848836e+00
----------Random Action----------
TIMESTEP 13397 / STATE explore / EPSILON 0.4994341132 / ACTION 0 / REWARD 0.1 / Q_MAX 7.850580e+00
TIMESTEP 13398 / STATE explore / EPSILON 0.499433946567 / ACTION 1 / REWARD 0.1 / Q_MAX 7.660813e+00
TIMESTEP 13399 / STATE explore / EPSILON 0.499433779933 / ACTIO

TIMESTEP 13463 / STATE explore / EPSILON 0.4994231154 / ACTION 0 / REWARD 0.1 / Q_MAX 6.068582e+00
TIMESTEP 13464 / STATE explore / EPSILON 0.499422948767 / ACTION 0 / REWARD 0.1 / Q_MAX 6.169868e+00
TIMESTEP 13465 / STATE explore / EPSILON 0.499422782133 / ACTION 0 / REWARD 0.1 / Q_MAX 5.740998e+00
TIMESTEP 13466 / STATE explore / EPSILON 0.4994226155 / ACTION 0 / REWARD 0.1 / Q_MAX 5.885658e+00
----------Random Action----------
TIMESTEP 13467 / STATE explore / EPSILON 0.499422448867 / ACTION 1 / REWARD 0.1 / Q_MAX 6.145771e+00
----------Random Action----------
TIMESTEP 13468 / STATE explore / EPSILON 0.499422282233 / ACTION 0 / REWARD 0.1 / Q_MAX 4.257745e+00
TIMESTEP 13469 / STATE explore / EPSILON 0.4994221156 / ACTION 1 / REWARD -1 / Q_MAX -1.838747e-01
TIMESTEP 13470 / STATE explore / EPSILON 0.499421948967 / ACTION 0 / REWARD 0.1 / Q_MAX 7.766325e+00
TIMESTEP 13471 / STATE explore / EPSILON 0.499421782333 / ACTION 0 / REWARD 0.1 / Q_MAX 7.926435e+00
TIMESTEP 13472 / STATE explor

TIMESTEP 13540 / STATE explore / EPSILON 0.499410284633 / ACTION 0 / REWARD 0.1 / Q_MAX 7.867039e+00
TIMESTEP 13541 / STATE explore / EPSILON 0.499410118 / ACTION 0 / REWARD 0.1 / Q_MAX 7.717078e+00
----------Random Action----------
TIMESTEP 13542 / STATE explore / EPSILON 0.499409951367 / ACTION 0 / REWARD 0.1 / Q_MAX 7.784484e+00
----------Random Action----------
TIMESTEP 13543 / STATE explore / EPSILON 0.499409784733 / ACTION 0 / REWARD 0.1 / Q_MAX 7.723294e+00
TIMESTEP 13544 / STATE explore / EPSILON 0.4994096181 / ACTION 0 / REWARD 0.1 / Q_MAX 7.779776e+00
----------Random Action----------
TIMESTEP 13545 / STATE explore / EPSILON 0.499409451467 / ACTION 0 / REWARD 0.1 / Q_MAX 7.751844e+00
TIMESTEP 13546 / STATE explore / EPSILON 0.499409284833 / ACTION 0 / REWARD 0.1 / Q_MAX 7.575186e+00
TIMESTEP 13547 / STATE explore / EPSILON 0.4994091182 / ACTION 0 / REWARD 0.1 / Q_MAX 7.415269e+00
----------Random Action----------
TIMESTEP 13548 / STATE explore / EPSILON 0.499408951567 / ACTIO

TIMESTEP 13614 / STATE explore / EPSILON 0.499397953767 / ACTION 0 / REWARD 0.1 / Q_MAX 7.711460e+00
----------Random Action----------
TIMESTEP 13615 / STATE explore / EPSILON 0.499397787133 / ACTION 0 / REWARD 0.1 / Q_MAX 7.674773e+00
----------Random Action----------
TIMESTEP 13616 / STATE explore / EPSILON 0.4993976205 / ACTION 0 / REWARD 0.1 / Q_MAX 7.638391e+00
----------Random Action----------
TIMESTEP 13617 / STATE explore / EPSILON 0.499397453867 / ACTION 0 / REWARD 0.1 / Q_MAX 7.643134e+00
----------Random Action----------
TIMESTEP 13618 / STATE explore / EPSILON 0.499397287233 / ACTION 0 / REWARD 0.1 / Q_MAX 7.664923e+00
----------Random Action----------
TIMESTEP 13619 / STATE explore / EPSILON 0.4993971206 / ACTION 0 / REWARD 0.1 / Q_MAX 7.629056e+00
----------Random Action----------
TIMESTEP 13620 / STATE explore / EPSILON 0.499396953967 / ACTION 0 / REWARD 0.1 / Q_MAX 7.639137e+00
----------Random Action----------
TIMESTEP 13621 / STATE explore / EPSILON 0.499396787333 / A

----------Random Action----------
TIMESTEP 13689 / STATE explore / EPSILON 0.499385456267 / ACTION 0 / REWARD 0.1 / Q_MAX 7.406075e+00
TIMESTEP 13690 / STATE explore / EPSILON 0.499385289633 / ACTION 0 / REWARD 0.1 / Q_MAX 7.374023e+00
TIMESTEP 13691 / STATE explore / EPSILON 0.499385123 / ACTION 0 / REWARD 0.1 / Q_MAX 7.323546e+00
----------Random Action----------
TIMESTEP 13692 / STATE explore / EPSILON 0.499384956367 / ACTION 0 / REWARD 0.1 / Q_MAX 7.287622e+00
TIMESTEP 13693 / STATE explore / EPSILON 0.499384789733 / ACTION 0 / REWARD 0.1 / Q_MAX 7.199621e+00
----------Random Action----------
TIMESTEP 13694 / STATE explore / EPSILON 0.4993846231 / ACTION 0 / REWARD 0.1 / Q_MAX 7.170848e+00
----------Random Action----------
TIMESTEP 13695 / STATE explore / EPSILON 0.499384456467 / ACTION 1 / REWARD 0.1 / Q_MAX 7.065551e+00
TIMESTEP 13696 / STATE explore / EPSILON 0.499384289833 / ACTION 0 / REWARD 0.1 / Q_MAX 6.842699e+00
TIMESTEP 13697 / STATE explore / EPSILON 0.4993841232 / ACTIO

TIMESTEP 13764 / STATE explore / EPSILON 0.499372958767 / ACTION 0 / REWARD 0.1 / Q_MAX 5.481943e+00
TIMESTEP 13765 / STATE explore / EPSILON 0.499372792133 / ACTION 0 / REWARD 0.1 / Q_MAX 5.719597e+00
----------Random Action----------
TIMESTEP 13766 / STATE explore / EPSILON 0.4993726255 / ACTION 0 / REWARD 0.1 / Q_MAX 6.008091e+00
TIMESTEP 13767 / STATE explore / EPSILON 0.499372458867 / ACTION 0 / REWARD 1 / Q_MAX 6.039173e+00
TIMESTEP 13768 / STATE explore / EPSILON 0.499372292233 / ACTION 0 / REWARD 0.1 / Q_MAX 5.639202e+00
TIMESTEP 13769 / STATE explore / EPSILON 0.4993721256 / ACTION 0 / REWARD 0.1 / Q_MAX 5.761638e+00
TIMESTEP 13770 / STATE explore / EPSILON 0.499371958967 / ACTION 0 / REWARD 0.1 / Q_MAX 5.930671e+00
----------Random Action----------
TIMESTEP 13771 / STATE explore / EPSILON 0.499371792333 / ACTION 0 / REWARD 0.1 / Q_MAX 6.210415e+00
----------Random Action----------
TIMESTEP 13772 / STATE explore / EPSILON 0.4993716257 / ACTION 0 / REWARD 0.1 / Q_MAX 6.073946e+

TIMESTEP 13838 / STATE explore / EPSILON 0.4993606279 / ACTION 0 / REWARD 0.1 / Q_MAX 7.493096e+00
TIMESTEP 13839 / STATE explore / EPSILON 0.499360461267 / ACTION 1 / REWARD 0.1 / Q_MAX 6.535974e+00
----------Random Action----------
TIMESTEP 13840 / STATE explore / EPSILON 0.499360294633 / ACTION 0 / REWARD 0.1 / Q_MAX 6.610197e+00
TIMESTEP 13841 / STATE explore / EPSILON 0.499360128 / ACTION 1 / REWARD 0.1 / Q_MAX 6.905032e+00
----------Random Action----------
TIMESTEP 13842 / STATE explore / EPSILON 0.499359961367 / ACTION 0 / REWARD 0.1 / Q_MAX 6.853287e+00
TIMESTEP 13843 / STATE explore / EPSILON 0.499359794733 / ACTION 1 / REWARD 0.1 / Q_MAX 6.695370e+00
TIMESTEP 13844 / STATE explore / EPSILON 0.4993596281 / ACTION 1 / REWARD 0.1 / Q_MAX 6.461135e+00
TIMESTEP 13845 / STATE explore / EPSILON 0.499359461467 / ACTION 0 / REWARD 0.1 / Q_MAX 6.493747e+00
TIMESTEP 13846 / STATE explore / EPSILON 0.499359294833 / ACTION 0 / REWARD 0.1 / Q_MAX 6.445090e+00
TIMESTEP 13847 / STATE explore

TIMESTEP 13914 / STATE explore / EPSILON 0.499347963767 / ACTION 0 / REWARD 0.1 / Q_MAX 7.885665e+00
----------Random Action----------
TIMESTEP 13915 / STATE explore / EPSILON 0.499347797133 / ACTION 0 / REWARD 0.1 / Q_MAX 7.867034e+00
TIMESTEP 13916 / STATE explore / EPSILON 0.4993476305 / ACTION 0 / REWARD 0.1 / Q_MAX 7.872503e+00
TIMESTEP 13917 / STATE explore / EPSILON 0.499347463867 / ACTION 0 / REWARD 0.1 / Q_MAX 7.903638e+00
----------Random Action----------
TIMESTEP 13918 / STATE explore / EPSILON 0.499347297233 / ACTION 0 / REWARD 0.1 / Q_MAX 7.899918e+00
----------Random Action----------
TIMESTEP 13919 / STATE explore / EPSILON 0.4993471306 / ACTION 0 / REWARD 0.1 / Q_MAX 7.848514e+00
TIMESTEP 13920 / STATE explore / EPSILON 0.499346963967 / ACTION 0 / REWARD 0.1 / Q_MAX 7.767429e+00
TIMESTEP 13921 / STATE explore / EPSILON 0.499346797333 / ACTION 0 / REWARD 0.1 / Q_MAX 7.743916e+00
TIMESTEP 13922 / STATE explore / EPSILON 0.4993466307 / ACTION 0 / REWARD 0.1 / Q_MAX 7.737583

TIMESTEP 13988 / STATE explore / EPSILON 0.4993356329 / ACTION 0 / REWARD 0.1 / Q_MAX 7.445995e+00
----------Random Action----------
TIMESTEP 13989 / STATE explore / EPSILON 0.499335466267 / ACTION 0 / REWARD 0.1 / Q_MAX 7.529026e+00
----------Random Action----------
TIMESTEP 13990 / STATE explore / EPSILON 0.499335299633 / ACTION 0 / REWARD 0.1 / Q_MAX 7.730381e+00
TIMESTEP 13991 / STATE explore / EPSILON 0.499335133 / ACTION 0 / REWARD 0.1 / Q_MAX 7.761004e+00
TIMESTEP 13992 / STATE explore / EPSILON 0.499334966367 / ACTION 0 / REWARD 0.1 / Q_MAX 7.709675e+00
TIMESTEP 13993 / STATE explore / EPSILON 0.499334799733 / ACTION 0 / REWARD 0.1 / Q_MAX 7.659943e+00
TIMESTEP 13994 / STATE explore / EPSILON 0.4993346331 / ACTION 0 / REWARD 0.1 / Q_MAX 7.821152e+00
TIMESTEP 13995 / STATE explore / EPSILON 0.499334466467 / ACTION 0 / REWARD 0.1 / Q_MAX 7.873078e+00
----------Random Action----------
TIMESTEP 13996 / STATE explore / EPSILON 0.499334299833 / ACTION 0 / REWARD 0.1 / Q_MAX 7.862128e

TIMESTEP 14062 / STATE explore / EPSILON 0.499323302033 / ACTION 0 / REWARD 0.1 / Q_MAX 7.880457e+00
----------Random Action----------
TIMESTEP 14063 / STATE explore / EPSILON 0.4993231354 / ACTION 0 / REWARD 0.1 / Q_MAX 8.123949e+00
----------Random Action----------
TIMESTEP 14064 / STATE explore / EPSILON 0.499322968767 / ACTION 0 / REWARD 0.1 / Q_MAX 8.039748e+00
----------Random Action----------
TIMESTEP 14065 / STATE explore / EPSILON 0.499322802133 / ACTION 1 / REWARD 0.1 / Q_MAX 8.071884e+00
TIMESTEP 14066 / STATE explore / EPSILON 0.4993226355 / ACTION 0 / REWARD 0.1 / Q_MAX 7.677231e+00
----------Random Action----------
TIMESTEP 14067 / STATE explore / EPSILON 0.499322468867 / ACTION 0 / REWARD 0.1 / Q_MAX 7.477756e+00
TIMESTEP 14068 / STATE explore / EPSILON 0.499322302233 / ACTION 0 / REWARD 0.1 / Q_MAX 7.441954e+00
----------Random Action----------
TIMESTEP 14069 / STATE explore / EPSILON 0.4993221356 / ACTION 0 / REWARD 0.1 / Q_MAX 7.303771e+00
----------Random Action-----

----------Random Action----------
TIMESTEP 14131 / STATE explore / EPSILON 0.499311804333 / ACTION 1 / REWARD 0.1 / Q_MAX 8.262578e+00
----------Random Action----------
TIMESTEP 14132 / STATE explore / EPSILON 0.4993116377 / ACTION 0 / REWARD 0.1 / Q_MAX 8.371120e+00
----------Random Action----------
TIMESTEP 14133 / STATE explore / EPSILON 0.499311471067 / ACTION 0 / REWARD 0.1 / Q_MAX 8.218387e+00
TIMESTEP 14134 / STATE explore / EPSILON 0.499311304433 / ACTION 0 / REWARD 0.1 / Q_MAX 8.176597e+00
TIMESTEP 14135 / STATE explore / EPSILON 0.4993111378 / ACTION 0 / REWARD 0.1 / Q_MAX 8.124268e+00
TIMESTEP 14136 / STATE explore / EPSILON 0.499310971167 / ACTION 0 / REWARD 0.1 / Q_MAX 8.137125e+00
TIMESTEP 14137 / STATE explore / EPSILON 0.499310804533 / ACTION 0 / REWARD 0.1 / Q_MAX 8.060308e+00
TIMESTEP 14138 / STATE explore / EPSILON 0.4993106379 / ACTION 0 / REWARD 0.1 / Q_MAX 8.074639e+00
----------Random Action----------
TIMESTEP 14139 / STATE explore / EPSILON 0.499310471267 / ACTI

----------Random Action----------
TIMESTEP 14205 / STATE explore / EPSILON 0.499299473467 / ACTION 0 / REWARD 0.1 / Q_MAX 6.257325e+00
----------Random Action----------
TIMESTEP 14206 / STATE explore / EPSILON 0.499299306833 / ACTION 0 / REWARD 0.1 / Q_MAX 6.144595e+00
----------Random Action----------
TIMESTEP 14207 / STATE explore / EPSILON 0.4992991402 / ACTION 0 / REWARD 0.1 / Q_MAX 6.003133e+00
TIMESTEP 14208 / STATE explore / EPSILON 0.499298973567 / ACTION 0 / REWARD 0.1 / Q_MAX 5.726785e+00
----------Random Action----------
TIMESTEP 14209 / STATE explore / EPSILON 0.499298806933 / ACTION 0 / REWARD 0.1 / Q_MAX 5.650292e+00
----------Random Action----------
TIMESTEP 14210 / STATE explore / EPSILON 0.4992986403 / ACTION 0 / REWARD 0.1 / Q_MAX 5.453745e+00
TIMESTEP 14211 / STATE explore / EPSILON 0.499298473667 / ACTION 1 / REWARD 0.1 / Q_MAX 5.351553e+00
----------Random Action----------
TIMESTEP 14212 / STATE explore / EPSILON 0.499298307033 / ACTION 0 / REWARD 0.1 / Q_MAX 5.419

----------Random Action----------
TIMESTEP 14279 / STATE explore / EPSILON 0.4992871426 / ACTION 0 / REWARD 0.1 / Q_MAX 7.675150e+00
TIMESTEP 14280 / STATE explore / EPSILON 0.499286975967 / ACTION 0 / REWARD 0.1 / Q_MAX 7.434850e+00
TIMESTEP 14281 / STATE explore / EPSILON 0.499286809333 / ACTION 0 / REWARD 0.1 / Q_MAX 7.329259e+00
----------Random Action----------
TIMESTEP 14282 / STATE explore / EPSILON 0.4992866427 / ACTION 0 / REWARD 0.1 / Q_MAX 7.239539e+00
----------Random Action----------
TIMESTEP 14283 / STATE explore / EPSILON 0.499286476067 / ACTION 0 / REWARD 0.1 / Q_MAX 7.199081e+00
TIMESTEP 14284 / STATE explore / EPSILON 0.499286309433 / ACTION 1 / REWARD 0.1 / Q_MAX 7.121765e+00
----------Random Action----------
TIMESTEP 14285 / STATE explore / EPSILON 0.4992861428 / ACTION 0 / REWARD 0.1 / Q_MAX 6.771103e+00
TIMESTEP 14286 / STATE explore / EPSILON 0.499285976167 / ACTION 0 / REWARD 0.1 / Q_MAX 6.765757e+00
TIMESTEP 14287 / STATE explore / EPSILON 0.499285809533 / ACTI

TIMESTEP 14352 / STATE explore / EPSILON 0.499274978367 / ACTION 0 / REWARD 0.1 / Q_MAX 6.688056e+00
----------Random Action----------
TIMESTEP 14353 / STATE explore / EPSILON 0.499274811733 / ACTION 0 / REWARD 0.1 / Q_MAX 6.770108e+00
TIMESTEP 14354 / STATE explore / EPSILON 0.4992746451 / ACTION 0 / REWARD 0.1 / Q_MAX 6.662035e+00
TIMESTEP 14355 / STATE explore / EPSILON 0.499274478467 / ACTION 0 / REWARD 0.1 / Q_MAX 6.626478e+00
TIMESTEP 14356 / STATE explore / EPSILON 0.499274311833 / ACTION 0 / REWARD 0.1 / Q_MAX 6.595234e+00
----------Random Action----------
TIMESTEP 14357 / STATE explore / EPSILON 0.4992741452 / ACTION 0 / REWARD 0.1 / Q_MAX 6.461207e+00
----------Random Action----------
TIMESTEP 14358 / STATE explore / EPSILON 0.499273978567 / ACTION 0 / REWARD 0.1 / Q_MAX 6.316076e+00
----------Random Action----------
TIMESTEP 14359 / STATE explore / EPSILON 0.499273811933 / ACTION 1 / REWARD 0.1 / Q_MAX 6.042459e+00
----------Random Action----------
TIMESTEP 14360 / STATE exp

----------Random Action----------
TIMESTEP 14426 / STATE explore / EPSILON 0.4992626475 / ACTION 0 / REWARD 0.1 / Q_MAX 6.216854e+00
----------Random Action----------
TIMESTEP 14427 / STATE explore / EPSILON 0.499262480867 / ACTION 0 / REWARD 1 / Q_MAX 6.350369e+00
TIMESTEP 14428 / STATE explore / EPSILON 0.499262314233 / ACTION 0 / REWARD 0.1 / Q_MAX 5.752169e+00
----------Random Action----------
TIMESTEP 14429 / STATE explore / EPSILON 0.4992621476 / ACTION 1 / REWARD 0.1 / Q_MAX 5.607965e+00
----------Random Action----------
TIMESTEP 14430 / STATE explore / EPSILON 0.499261980967 / ACTION 0 / REWARD 0.1 / Q_MAX 5.323937e+00
TIMESTEP 14431 / STATE explore / EPSILON 0.499261814333 / ACTION 1 / REWARD 0.1 / Q_MAX 5.390682e+00
----------Random Action----------
TIMESTEP 14432 / STATE explore / EPSILON 0.4992616477 / ACTION 0 / REWARD 0.1 / Q_MAX 4.870563e+00
TIMESTEP 14433 / STATE explore / EPSILON 0.499261481067 / ACTION 0 / REWARD 0.1 / Q_MAX 4.634860e+00
TIMESTEP 14434 / STATE explore

----------Random Action----------
TIMESTEP 14500 / STATE explore / EPSILON 0.499250316633 / ACTION 0 / REWARD 0.1 / Q_MAX 5.309639e+00
TIMESTEP 14501 / STATE explore / EPSILON 0.49925015 / ACTION 0 / REWARD 0.1 / Q_MAX 5.104095e+00
TIMESTEP 14502 / STATE explore / EPSILON 0.499249983367 / ACTION 1 / REWARD 0.1 / Q_MAX 4.690211e+00
----------Random Action----------
TIMESTEP 14503 / STATE explore / EPSILON 0.499249816733 / ACTION 0 / REWARD 0.1 / Q_MAX 5.209124e+00
TIMESTEP 14504 / STATE explore / EPSILON 0.4992496501 / ACTION 0 / REWARD 0.1 / Q_MAX 5.354054e+00
TIMESTEP 14505 / STATE explore / EPSILON 0.499249483467 / ACTION 0 / REWARD 0.1 / Q_MAX 5.411148e+00
TIMESTEP 14506 / STATE explore / EPSILON 0.499249316833 / ACTION 0 / REWARD 0.1 / Q_MAX 5.470565e+00
----------Random Action----------
TIMESTEP 14507 / STATE explore / EPSILON 0.4992491502 / ACTION 0 / REWARD 0.1 / Q_MAX 5.842278e+00
TIMESTEP 14508 / STATE explore / EPSILON 0.499248983567 / ACTION 0 / REWARD 0.1 / Q_MAX 6.257580e+

TIMESTEP 14573 / STATE explore / EPSILON 0.4992381524 / ACTION 0 / REWARD 0.1 / Q_MAX 6.432564e+00
----------Random Action----------
TIMESTEP 14574 / STATE explore / EPSILON 0.499237985767 / ACTION 0 / REWARD 0.1 / Q_MAX 6.416540e+00
----------Random Action----------
TIMESTEP 14575 / STATE explore / EPSILON 0.499237819133 / ACTION 0 / REWARD 0.1 / Q_MAX 6.370656e+00
TIMESTEP 14576 / STATE explore / EPSILON 0.4992376525 / ACTION 0 / REWARD 0.1 / Q_MAX 6.363801e+00
TIMESTEP 14577 / STATE explore / EPSILON 0.499237485867 / ACTION 0 / REWARD 0.1 / Q_MAX 6.169193e+00
TIMESTEP 14578 / STATE explore / EPSILON 0.499237319233 / ACTION 0 / REWARD 0.1 / Q_MAX 6.107744e+00
TIMESTEP 14579 / STATE explore / EPSILON 0.4992371526 / ACTION 0 / REWARD 0.1 / Q_MAX 5.969632e+00
TIMESTEP 14580 / STATE explore / EPSILON 0.499236985967 / ACTION 0 / REWARD 0.1 / Q_MAX 5.891111e+00
----------Random Action----------
TIMESTEP 14581 / STATE explore / EPSILON 0.499236819333 / ACTION 0 / REWARD 0.1 / Q_MAX 5.825120

TIMESTEP 14645 / STATE explore / EPSILON 0.4992261548 / ACTION 0 / REWARD 0.1 / Q_MAX 7.011019e+00
----------Random Action----------
TIMESTEP 14646 / STATE explore / EPSILON 0.499225988167 / ACTION 0 / REWARD 0.1 / Q_MAX 6.861448e+00
----------Random Action----------
TIMESTEP 14647 / STATE explore / EPSILON 0.499225821533 / ACTION 1 / REWARD 0.1 / Q_MAX 6.750212e+00
TIMESTEP 14648 / STATE explore / EPSILON 0.4992256549 / ACTION 1 / REWARD 0.1 / Q_MAX 6.612607e+00
TIMESTEP 14649 / STATE explore / EPSILON 0.499225488267 / ACTION 1 / REWARD 0.1 / Q_MAX 6.578967e+00
----------Random Action----------
TIMESTEP 14650 / STATE explore / EPSILON 0.499225321633 / ACTION 0 / REWARD 0.1 / Q_MAX 6.435225e+00
TIMESTEP 14651 / STATE explore / EPSILON 0.499225155 / ACTION 0 / REWARD 0.1 / Q_MAX 6.509973e+00
TIMESTEP 14652 / STATE explore / EPSILON 0.499224988367 / ACTION 0 / REWARD 0.1 / Q_MAX 6.494666e+00
----------Random Action----------
TIMESTEP 14653 / STATE explore / EPSILON 0.499224821733 / ACTIO

TIMESTEP 14718 / STATE explore / EPSILON 0.499213990567 / ACTION 0 / REWARD 0.1 / Q_MAX 6.991944e+00
TIMESTEP 14719 / STATE explore / EPSILON 0.499213823933 / ACTION 0 / REWARD 0.1 / Q_MAX 6.918883e+00
TIMESTEP 14720 / STATE explore / EPSILON 0.4992136573 / ACTION 0 / REWARD 0.1 / Q_MAX 6.908569e+00
----------Random Action----------
TIMESTEP 14721 / STATE explore / EPSILON 0.499213490667 / ACTION 0 / REWARD 0.1 / Q_MAX 6.824578e+00
----------Random Action----------
TIMESTEP 14722 / STATE explore / EPSILON 0.499213324033 / ACTION 0 / REWARD 0.1 / Q_MAX 6.746345e+00
TIMESTEP 14723 / STATE explore / EPSILON 0.4992131574 / ACTION 1 / REWARD 0.1 / Q_MAX 6.817090e+00
----------Random Action----------
TIMESTEP 14724 / STATE explore / EPSILON 0.499212990767 / ACTION 0 / REWARD 0.1 / Q_MAX 6.665001e+00
----------Random Action----------
TIMESTEP 14725 / STATE explore / EPSILON 0.499212824133 / ACTION 0 / REWARD 0.1 / Q_MAX 6.482053e+00
----------Random Action----------
TIMESTEP 14726 / STATE exp

TIMESTEP 14791 / STATE explore / EPSILON 0.499201826333 / ACTION 0 / REWARD 0.1 / Q_MAX 6.892240e+00
----------Random Action----------
TIMESTEP 14792 / STATE explore / EPSILON 0.4992016597 / ACTION 1 / REWARD 0.1 / Q_MAX 6.813939e+00
TIMESTEP 14793 / STATE explore / EPSILON 0.499201493067 / ACTION 1 / REWARD 0.1 / Q_MAX 6.738329e+00
----------Random Action----------
TIMESTEP 14794 / STATE explore / EPSILON 0.499201326433 / ACTION 0 / REWARD 0.1 / Q_MAX 6.659557e+00
TIMESTEP 14795 / STATE explore / EPSILON 0.4992011598 / ACTION 1 / REWARD 0.1 / Q_MAX 6.752482e+00
TIMESTEP 14796 / STATE explore / EPSILON 0.499200993167 / ACTION 0 / REWARD 0.1 / Q_MAX 6.966068e+00
TIMESTEP 14797 / STATE explore / EPSILON 0.499200826533 / ACTION 0 / REWARD 0.1 / Q_MAX 7.118233e+00
TIMESTEP 14798 / STATE explore / EPSILON 0.4992006599 / ACTION 0 / REWARD 0.1 / Q_MAX 7.083642e+00
----------Random Action----------
TIMESTEP 14799 / STATE explore / EPSILON 0.499200493267 / ACTION 0 / REWARD 0.1 / Q_MAX 7.019013

----------Random Action----------
TIMESTEP 14866 / STATE explore / EPSILON 0.499189328833 / ACTION 1 / REWARD 0.1 / Q_MAX 5.398088e+00
TIMESTEP 14867 / STATE explore / EPSILON 0.4991891622 / ACTION 1 / REWARD 0.1 / Q_MAX 5.363044e+00
----------Random Action----------
TIMESTEP 14868 / STATE explore / EPSILON 0.499188995567 / ACTION 0 / REWARD 0.1 / Q_MAX 5.138370e+00
----------Random Action----------
TIMESTEP 14869 / STATE explore / EPSILON 0.499188828933 / ACTION 0 / REWARD 0.1 / Q_MAX 5.248876e+00
TIMESTEP 14870 / STATE explore / EPSILON 0.4991886623 / ACTION 0 / REWARD 0.1 / Q_MAX 5.127749e+00
TIMESTEP 14871 / STATE explore / EPSILON 0.499188495667 / ACTION 0 / REWARD 0.1 / Q_MAX 5.062428e+00
TIMESTEP 14872 / STATE explore / EPSILON 0.499188329033 / ACTION 0 / REWARD 0.1 / Q_MAX 4.623595e+00
TIMESTEP 14873 / STATE explore / EPSILON 0.4991881624 / ACTION 0 / REWARD 0.1 / Q_MAX 4.428293e+00
TIMESTEP 14874 / STATE explore / EPSILON 0.499187995767 / ACTION 0 / REWARD 0.1 / Q_MAX 4.400603

TIMESTEP 14941 / STATE explore / EPSILON 0.499176831333 / ACTION 1 / REWARD 0.1 / Q_MAX 4.383188e+00
TIMESTEP 14942 / STATE explore / EPSILON 0.4991766647 / ACTION 0 / REWARD 0.1 / Q_MAX 4.737916e+00
TIMESTEP 14943 / STATE explore / EPSILON 0.499176498067 / ACTION 0 / REWARD 0.1 / Q_MAX 4.963204e+00
----------Random Action----------
TIMESTEP 14944 / STATE explore / EPSILON 0.499176331433 / ACTION 0 / REWARD 0.1 / Q_MAX 4.962326e+00
TIMESTEP 14945 / STATE explore / EPSILON 0.4991761648 / ACTION 0 / REWARD 0.1 / Q_MAX 5.257790e+00
TIMESTEP 14946 / STATE explore / EPSILON 0.499175998167 / ACTION 0 / REWARD 0.1 / Q_MAX 5.363321e+00
----------Random Action----------
TIMESTEP 14947 / STATE explore / EPSILON 0.499175831533 / ACTION 0 / REWARD 0.1 / Q_MAX 5.447213e+00
TIMESTEP 14948 / STATE explore / EPSILON 0.4991756649 / ACTION 0 / REWARD 0.1 / Q_MAX 5.474866e+00
TIMESTEP 14949 / STATE explore / EPSILON 0.499175498267 / ACTION 0 / REWARD 0.1 / Q_MAX 5.252965e+00
----------Random Action------

----------Random Action----------
TIMESTEP 15014 / STATE explore / EPSILON 0.4991646671 / ACTION 0 / REWARD 0.1 / Q_MAX 6.912162e+00
TIMESTEP 15015 / STATE explore / EPSILON 0.499164500467 / ACTION 0 / REWARD 0.1 / Q_MAX 6.811430e+00
TIMESTEP 15016 / STATE explore / EPSILON 0.499164333833 / ACTION 0 / REWARD 0.1 / Q_MAX 6.801949e+00
----------Random Action----------
TIMESTEP 15017 / STATE explore / EPSILON 0.4991641672 / ACTION 0 / REWARD 0.1 / Q_MAX 6.698916e+00
----------Random Action----------
TIMESTEP 15018 / STATE explore / EPSILON 0.499164000567 / ACTION 0 / REWARD 0.1 / Q_MAX 6.624464e+00
TIMESTEP 15019 / STATE explore / EPSILON 0.499163833933 / ACTION 1 / REWARD 0.1 / Q_MAX 6.624678e+00
----------Random Action----------
TIMESTEP 15020 / STATE explore / EPSILON 0.4991636673 / ACTION 0 / REWARD 0.1 / Q_MAX 6.619303e+00
----------Random Action----------
TIMESTEP 15021 / STATE explore / EPSILON 0.499163500667 / ACTION 1 / REWARD 0.1 / Q_MAX 6.383839e+00
TIMESTEP 15022 / STATE explo

TIMESTEP 15086 / STATE explore / EPSILON 0.4991526695 / ACTION 0 / REWARD 0.1 / Q_MAX 5.680039e+00
----------Random Action----------
TIMESTEP 15087 / STATE explore / EPSILON 0.499152502867 / ACTION 0 / REWARD 0.1 / Q_MAX 5.509377e+00
----------Random Action----------
TIMESTEP 15088 / STATE explore / EPSILON 0.499152336233 / ACTION 0 / REWARD 0.1 / Q_MAX 5.384927e+00
TIMESTEP 15089 / STATE explore / EPSILON 0.4991521696 / ACTION 0 / REWARD 0.1 / Q_MAX 5.323336e+00
----------Random Action----------
TIMESTEP 15090 / STATE explore / EPSILON 0.499152002967 / ACTION 0 / REWARD 0.1 / Q_MAX 4.918649e+00
TIMESTEP 15091 / STATE explore / EPSILON 0.499151836333 / ACTION 0 / REWARD 0.1 / Q_MAX 5.079992e+00
----------Random Action----------
TIMESTEP 15092 / STATE explore / EPSILON 0.4991516697 / ACTION 1 / REWARD 0.1 / Q_MAX 4.885359e+00
TIMESTEP 15093 / STATE explore / EPSILON 0.499151503067 / ACTION 0 / REWARD 0.1 / Q_MAX 3.891322e+00
TIMESTEP 15094 / STATE explore / EPSILON 0.499151336433 / ACTI

TIMESTEP 15161 / STATE explore / EPSILON 0.499140172 / ACTION 1 / REWARD 0.1 / Q_MAX 6.676813e+00
TIMESTEP 15162 / STATE explore / EPSILON 0.499140005367 / ACTION 1 / REWARD 0.1 / Q_MAX 6.584109e+00
TIMESTEP 15163 / STATE explore / EPSILON 0.499139838733 / ACTION 0 / REWARD 0.1 / Q_MAX 6.521134e+00
----------Random Action----------
TIMESTEP 15164 / STATE explore / EPSILON 0.4991396721 / ACTION 0 / REWARD 0.1 / Q_MAX 6.507915e+00
----------Random Action----------
TIMESTEP 15165 / STATE explore / EPSILON 0.499139505467 / ACTION 0 / REWARD 0.1 / Q_MAX 6.414560e+00
TIMESTEP 15166 / STATE explore / EPSILON 0.499139338833 / ACTION 0 / REWARD 0.1 / Q_MAX 6.407594e+00
----------Random Action----------
TIMESTEP 15167 / STATE explore / EPSILON 0.4991391722 / ACTION 0 / REWARD 0.1 / Q_MAX 6.478942e+00
TIMESTEP 15168 / STATE explore / EPSILON 0.499139005567 / ACTION 0 / REWARD 0.1 / Q_MAX 6.664683e+00
----------Random Action----------
TIMESTEP 15169 / STATE explore / EPSILON 0.499138838933 / ACTIO

----------Random Action----------
TIMESTEP 15236 / STATE explore / EPSILON 0.4991276745 / ACTION 0 / REWARD 0.1 / Q_MAX 6.333433e+00
TIMESTEP 15237 / STATE explore / EPSILON 0.499127507867 / ACTION 0 / REWARD 0.1 / Q_MAX 6.337943e+00
----------Random Action----------
TIMESTEP 15238 / STATE explore / EPSILON 0.499127341233 / ACTION 0 / REWARD 0.1 / Q_MAX 6.256062e+00
----------Random Action----------
TIMESTEP 15239 / STATE explore / EPSILON 0.4991271746 / ACTION 0 / REWARD 0.1 / Q_MAX 6.168312e+00
----------Random Action----------
TIMESTEP 15240 / STATE explore / EPSILON 0.499127007967 / ACTION 0 / REWARD 0.1 / Q_MAX 5.875888e+00
----------Random Action----------
TIMESTEP 15241 / STATE explore / EPSILON 0.499126841333 / ACTION 0 / REWARD 0.1 / Q_MAX 5.732960e+00
----------Random Action----------
TIMESTEP 15242 / STATE explore / EPSILON 0.4991266747 / ACTION 0 / REWARD 0.1 / Q_MAX 5.620440e+00
----------Random Action----------
TIMESTEP 15243 / STATE explore / EPSILON 0.499126508067 / ACT

TIMESTEP 15308 / STATE explore / EPSILON 0.4991156769 / ACTION 0 / REWARD 0.1 / Q_MAX 4.966732e+00
TIMESTEP 15309 / STATE explore / EPSILON 0.499115510267 / ACTION 1 / REWARD 0.1 / Q_MAX 4.691188e+00
TIMESTEP 15310 / STATE explore / EPSILON 0.499115343633 / ACTION 1 / REWARD 0.1 / Q_MAX 4.905823e+00
TIMESTEP 15311 / STATE explore / EPSILON 0.499115177 / ACTION 0 / REWARD 0.1 / Q_MAX 5.237219e+00
TIMESTEP 15312 / STATE explore / EPSILON 0.499115010367 / ACTION 0 / REWARD 0.1 / Q_MAX 4.766017e+00
----------Random Action----------
TIMESTEP 15313 / STATE explore / EPSILON 0.499114843733 / ACTION 0 / REWARD 0.1 / Q_MAX 4.681249e+00
TIMESTEP 15314 / STATE explore / EPSILON 0.4991146771 / ACTION 0 / REWARD 1 / Q_MAX 4.969220e+00
----------Random Action----------
TIMESTEP 15315 / STATE explore / EPSILON 0.499114510467 / ACTION 0 / REWARD 0.1 / Q_MAX 4.464235e+00
TIMESTEP 15316 / STATE explore / EPSILON 0.499114343833 / ACTION 0 / REWARD 0.1 / Q_MAX 4.454885e+00
TIMESTEP 15317 / STATE explore /

TIMESTEP 15383 / STATE explore / EPSILON 0.4991031794 / ACTION 0 / REWARD 0.1 / Q_MAX 6.182950e+00
TIMESTEP 15384 / STATE explore / EPSILON 0.499103012767 / ACTION 0 / REWARD 0.1 / Q_MAX 6.079363e+00
----------Random Action----------
TIMESTEP 15385 / STATE explore / EPSILON 0.499102846133 / ACTION 0 / REWARD 0.1 / Q_MAX 6.034814e+00
----------Random Action----------
TIMESTEP 15386 / STATE explore / EPSILON 0.4991026795 / ACTION 0 / REWARD 0.1 / Q_MAX 5.923225e+00
----------Random Action----------
TIMESTEP 15387 / STATE explore / EPSILON 0.499102512867 / ACTION 0 / REWARD 0.1 / Q_MAX 5.790619e+00
TIMESTEP 15388 / STATE explore / EPSILON 0.499102346233 / ACTION 0 / REWARD 0.1 / Q_MAX 5.785378e+00
----------Random Action----------
TIMESTEP 15389 / STATE explore / EPSILON 0.4991021796 / ACTION 0 / REWARD 0.1 / Q_MAX 5.551615e+00
----------Random Action----------
TIMESTEP 15390 / STATE explore / EPSILON 0.499102012967 / ACTION 1 / REWARD 0.1 / Q_MAX 5.348593e+00
----------Random Action-----

TIMESTEP 15456 / STATE explore / EPSILON 0.499091015167 / ACTION 0 / REWARD 0.1 / Q_MAX 4.352308e+00
TIMESTEP 15457 / STATE explore / EPSILON 0.499090848533 / ACTION 0 / REWARD 0.1 / Q_MAX 4.808023e+00
TIMESTEP 15458 / STATE explore / EPSILON 0.4990906819 / ACTION 0 / REWARD 0.1 / Q_MAX 4.767240e+00
TIMESTEP 15459 / STATE explore / EPSILON 0.499090515267 / ACTION 0 / REWARD 0.1 / Q_MAX 4.994022e+00
----------Random Action----------
TIMESTEP 15460 / STATE explore / EPSILON 0.499090348633 / ACTION 0 / REWARD 0.1 / Q_MAX 5.267566e+00
----------Random Action----------
TIMESTEP 15461 / STATE explore / EPSILON 0.499090182 / ACTION 0 / REWARD 0.1 / Q_MAX 5.113704e+00
----------Random Action----------
TIMESTEP 15462 / STATE explore / EPSILON 0.499090015367 / ACTION 0 / REWARD 0.1 / Q_MAX 5.317016e+00
----------Random Action----------
TIMESTEP 15463 / STATE explore / EPSILON 0.499089848733 / ACTION 0 / REWARD 1 / Q_MAX 5.504181e+00
TIMESTEP 15464 / STATE explore / EPSILON 0.4990896821 / ACTION 

TIMESTEP 15528 / STATE explore / EPSILON 0.499079017567 / ACTION 0 / REWARD 0.1 / Q_MAX 3.943737e+00
TIMESTEP 15529 / STATE explore / EPSILON 0.499078850933 / ACTION 0 / REWARD 0.1 / Q_MAX 1.986366e+00
----------Random Action----------
TIMESTEP 15530 / STATE explore / EPSILON 0.4990786843 / ACTION 0 / REWARD 0.1 / Q_MAX 2.004670e+00
----------Random Action----------
TIMESTEP 15531 / STATE explore / EPSILON 0.499078517667 / ACTION 0 / REWARD 0.1 / Q_MAX 3.423355e+00
----------Random Action----------
TIMESTEP 15532 / STATE explore / EPSILON 0.499078351033 / ACTION 0 / REWARD 1 / Q_MAX 2.876294e+00
TIMESTEP 15533 / STATE explore / EPSILON 0.4990781844 / ACTION 0 / REWARD 0.1 / Q_MAX 2.342033e+00
TIMESTEP 15534 / STATE explore / EPSILON 0.499078017767 / ACTION 0 / REWARD 0.1 / Q_MAX 2.293488e+00
----------Random Action----------
TIMESTEP 15535 / STATE explore / EPSILON 0.499077851133 / ACTION 0 / REWARD -1 / Q_MAX 2.316886e+00
TIMESTEP 15536 / STATE explore / EPSILON 0.4990776845 / ACTION 

TIMESTEP 15602 / STATE explore / EPSILON 0.4990666867 / ACTION 0 / REWARD 0.1 / Q_MAX 5.055084e+00
TIMESTEP 15603 / STATE explore / EPSILON 0.499066520067 / ACTION 0 / REWARD 0.1 / Q_MAX 5.032609e+00
TIMESTEP 15604 / STATE explore / EPSILON 0.499066353433 / ACTION 0 / REWARD 0.1 / Q_MAX 5.195364e+00
TIMESTEP 15605 / STATE explore / EPSILON 0.4990661868 / ACTION 0 / REWARD 0.1 / Q_MAX 5.171210e+00
TIMESTEP 15606 / STATE explore / EPSILON 0.499066020167 / ACTION 0 / REWARD 0.1 / Q_MAX 5.220966e+00
TIMESTEP 15607 / STATE explore / EPSILON 0.499065853533 / ACTION 0 / REWARD 0.1 / Q_MAX 5.185746e+00
TIMESTEP 15608 / STATE explore / EPSILON 0.4990656869 / ACTION 0 / REWARD 0.1 / Q_MAX 4.930135e+00
----------Random Action----------
TIMESTEP 15609 / STATE explore / EPSILON 0.499065520267 / ACTION 0 / REWARD 0.1 / Q_MAX 5.001981e+00
----------Random Action----------
TIMESTEP 15610 / STATE explore / EPSILON 0.499065353633 / ACTION 0 / REWARD 0.1 / Q_MAX 4.969699e+00
----------Random Action------

TIMESTEP 15680 / STATE explore / EPSILON 0.4990536893 / ACTION 0 / REWARD 0.1 / Q_MAX 4.524663e+00
----------Random Action----------
TIMESTEP 15681 / STATE explore / EPSILON 0.499053522667 / ACTION 0 / REWARD 0.1 / Q_MAX 5.048858e+00
----------Random Action----------
TIMESTEP 15682 / STATE explore / EPSILON 0.499053356033 / ACTION 0 / REWARD 1 / Q_MAX 5.328785e+00
----------Random Action----------
TIMESTEP 15683 / STATE explore / EPSILON 0.4990531894 / ACTION 1 / REWARD 0.1 / Q_MAX 4.775260e+00
----------Random Action----------
TIMESTEP 15684 / STATE explore / EPSILON 0.499053022767 / ACTION 0 / REWARD 0.1 / Q_MAX 1.871518e+00
----------Random Action----------
TIMESTEP 15685 / STATE explore / EPSILON 0.499052856133 / ACTION 0 / REWARD -1 / Q_MAX -3.081208e+00
TIMESTEP 15686 / STATE explore / EPSILON 0.4990526895 / ACTION 0 / REWARD 0.1 / Q_MAX 7.039968e+00
TIMESTEP 15687 / STATE explore / EPSILON 0.499052522867 / ACTION 0 / REWARD 0.1 / Q_MAX 7.153253e+00
TIMESTEP 15688 / STATE explore

TIMESTEP 15754 / STATE explore / EPSILON 0.499041358433 / ACTION 0 / REWARD 0.1 / Q_MAX 6.625917e+00
TIMESTEP 15755 / STATE explore / EPSILON 0.4990411918 / ACTION 0 / REWARD 0.1 / Q_MAX 6.626475e+00
TIMESTEP 15756 / STATE explore / EPSILON 0.499041025167 / ACTION 0 / REWARD 0.1 / Q_MAX 6.617587e+00
TIMESTEP 15757 / STATE explore / EPSILON 0.499040858533 / ACTION 0 / REWARD 0.1 / Q_MAX 6.768884e+00
----------Random Action----------
TIMESTEP 15758 / STATE explore / EPSILON 0.4990406919 / ACTION 1 / REWARD 0.1 / Q_MAX 6.882508e+00
----------Random Action----------
TIMESTEP 15759 / STATE explore / EPSILON 0.499040525267 / ACTION 0 / REWARD 0.1 / Q_MAX 6.738161e+00
TIMESTEP 15760 / STATE explore / EPSILON 0.499040358633 / ACTION 0 / REWARD 0.1 / Q_MAX 6.788705e+00
----------Random Action----------
TIMESTEP 15761 / STATE explore / EPSILON 0.499040192 / ACTION 0 / REWARD 0.1 / Q_MAX 6.870348e+00
----------Random Action----------
TIMESTEP 15762 / STATE explore / EPSILON 0.499040025367 / ACTIO

TIMESTEP 15828 / STATE explore / EPSILON 0.499029027567 / ACTION 0 / REWARD 0.1 / Q_MAX 4.533294e+00
----------Random Action----------
TIMESTEP 15829 / STATE explore / EPSILON 0.499028860933 / ACTION 0 / REWARD 0.1 / Q_MAX 4.737321e+00
TIMESTEP 15830 / STATE explore / EPSILON 0.4990286943 / ACTION 0 / REWARD 0.1 / Q_MAX 4.738317e+00
----------Random Action----------
TIMESTEP 15831 / STATE explore / EPSILON 0.499028527667 / ACTION 0 / REWARD 0.1 / Q_MAX 4.886960e+00
----------Random Action----------
TIMESTEP 15832 / STATE explore / EPSILON 0.499028361033 / ACTION 0 / REWARD -1 / Q_MAX 4.618833e+00
----------Random Action----------
TIMESTEP 15833 / STATE explore / EPSILON 0.4990281944 / ACTION 0 / REWARD 0.1 / Q_MAX 7.245947e+00
----------Random Action----------
TIMESTEP 15834 / STATE explore / EPSILON 0.499028027767 / ACTION 0 / REWARD 0.1 / Q_MAX 7.278892e+00
TIMESTEP 15835 / STATE explore / EPSILON 0.499027861133 / ACTION 0 / REWARD 0.1 / Q_MAX 7.042270e+00
TIMESTEP 15836 / STATE expl

TIMESTEP 15901 / STATE explore / EPSILON 0.499016863333 / ACTION 0 / REWARD 0.1 / Q_MAX 4.827344e+00
----------Random Action----------
TIMESTEP 15902 / STATE explore / EPSILON 0.4990166967 / ACTION 0 / REWARD 0.1 / Q_MAX 4.906598e+00
----------Random Action----------
TIMESTEP 15903 / STATE explore / EPSILON 0.499016530067 / ACTION 0 / REWARD 0.1 / Q_MAX 5.025721e+00
----------Random Action----------
TIMESTEP 15904 / STATE explore / EPSILON 0.499016363433 / ACTION 0 / REWARD 0.1 / Q_MAX 5.087280e+00
----------Random Action----------
TIMESTEP 15905 / STATE explore / EPSILON 0.4990161968 / ACTION 0 / REWARD 0.1 / Q_MAX 4.873772e+00
TIMESTEP 15906 / STATE explore / EPSILON 0.499016030167 / ACTION 0 / REWARD 0.1 / Q_MAX 4.948494e+00
TIMESTEP 15907 / STATE explore / EPSILON 0.499015863533 / ACTION 0 / REWARD 0.1 / Q_MAX 4.951797e+00
TIMESTEP 15908 / STATE explore / EPSILON 0.4990156969 / ACTION 0 / REWARD 0.1 / Q_MAX 5.087811e+00
TIMESTEP 15909 / STATE explore / EPSILON 0.499015530267 / ACTI

TIMESTEP 15973 / STATE explore / EPSILON 0.499004865733 / ACTION 0 / REWARD 0.1 / Q_MAX 7.249632e+00
----------Random Action----------
TIMESTEP 15974 / STATE explore / EPSILON 0.4990046991 / ACTION 0 / REWARD 0.1 / Q_MAX 7.181746e+00
TIMESTEP 15975 / STATE explore / EPSILON 0.499004532467 / ACTION 0 / REWARD 0.1 / Q_MAX 7.250360e+00
----------Random Action----------
TIMESTEP 15976 / STATE explore / EPSILON 0.499004365833 / ACTION 0 / REWARD 0.1 / Q_MAX 7.258081e+00
TIMESTEP 15977 / STATE explore / EPSILON 0.4990041992 / ACTION 0 / REWARD 0.1 / Q_MAX 7.343096e+00
----------Random Action----------
TIMESTEP 15978 / STATE explore / EPSILON 0.499004032567 / ACTION 0 / REWARD 0.1 / Q_MAX 7.362655e+00
----------Random Action----------
TIMESTEP 15979 / STATE explore / EPSILON 0.499003865933 / ACTION 0 / REWARD 0.1 / Q_MAX 7.289054e+00
TIMESTEP 15980 / STATE explore / EPSILON 0.4990036993 / ACTION 0 / REWARD 0.1 / Q_MAX 7.258994e+00
----------Random Action----------
TIMESTEP 15981 / STATE explo

TIMESTEP 16048 / STATE explore / EPSILON 0.498992368233 / ACTION 0 / REWARD 0.1 / Q_MAX 6.804598e+00
TIMESTEP 16049 / STATE explore / EPSILON 0.4989922016 / ACTION 0 / REWARD 0.1 / Q_MAX 6.831409e+00
----------Random Action----------
TIMESTEP 16050 / STATE explore / EPSILON 0.498992034967 / ACTION 0 / REWARD 0.1 / Q_MAX 6.709865e+00
TIMESTEP 16051 / STATE explore / EPSILON 0.498991868333 / ACTION 0 / REWARD 0.1 / Q_MAX 6.530013e+00
TIMESTEP 16052 / STATE explore / EPSILON 0.4989917017 / ACTION 0 / REWARD 0.1 / Q_MAX 6.250938e+00
----------Random Action----------
TIMESTEP 16053 / STATE explore / EPSILON 0.498991535067 / ACTION 1 / REWARD 0.1 / Q_MAX 6.173879e+00
----------Random Action----------
TIMESTEP 16054 / STATE explore / EPSILON 0.498991368433 / ACTION 1 / REWARD 0.1 / Q_MAX 5.572953e+00
TIMESTEP 16055 / STATE explore / EPSILON 0.4989912018 / ACTION 0 / REWARD 0.1 / Q_MAX 4.894275e+00
TIMESTEP 16056 / STATE explore / EPSILON 0.498991035167 / ACTION 0 / REWARD 0.1 / Q_MAX 5.207639

TIMESTEP 16121 / STATE explore / EPSILON 0.498980204 / ACTION 0 / REWARD 0.1 / Q_MAX 4.892140e+00
----------Random Action----------
TIMESTEP 16122 / STATE explore / EPSILON 0.498980037367 / ACTION 0 / REWARD 0.1 / Q_MAX 4.900556e+00
TIMESTEP 16123 / STATE explore / EPSILON 0.498979870733 / ACTION 0 / REWARD 0.1 / Q_MAX 4.761232e+00
----------Random Action----------
TIMESTEP 16124 / STATE explore / EPSILON 0.4989797041 / ACTION 0 / REWARD 0.1 / Q_MAX 4.685619e+00
TIMESTEP 16125 / STATE explore / EPSILON 0.498979537467 / ACTION 0 / REWARD 0.1 / Q_MAX 4.498698e+00
----------Random Action----------
TIMESTEP 16126 / STATE explore / EPSILON 0.498979370833 / ACTION 0 / REWARD 0.1 / Q_MAX 4.460619e+00
TIMESTEP 16127 / STATE explore / EPSILON 0.4989792042 / ACTION 0 / REWARD 0.1 / Q_MAX 4.464712e+00
TIMESTEP 16128 / STATE explore / EPSILON 0.498979037567 / ACTION 0 / REWARD 0.1 / Q_MAX 4.388371e+00
----------Random Action----------
TIMESTEP 16129 / STATE explore / EPSILON 0.498978870933 / ACTIO