In [1]:
#!/usr/bin/env python
import pygame
from __future__ import print_function
import os
os.environ["CUDA_VISIVBLE_DEVICES"] = '0'
import tensorflow as tf
import cv2
import sys
sys.path.append("game/")
import wrapped_flappy_bird as game
import random
import numpy as np
from collections import deque
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
print('tensorflow version: ',tf.__version__)


pygame 1.9.4
Hello from the pygame community. https://www.pygame.org/contribute.html
[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 3037498224429934671
]
tensorflow version:  1.10.0


In [2]:
import pygame
from __future__ import print_function
import numpy as np
from collections import deque
import cv2

In [3]:
GAME = 'bird' # the name of the game being played for log files
ACTIONS = 2 # number of valid actions
GAMMA = 0.99 # decay rate of past observations
OBSERVE = 10000. # timesteps to observe before training
EXPLORE = 3000000. # frames over which to anneal epsilon
FINAL_EPSILON = 0.0001 # final value of epsilon
INITIAL_EPSILON = 0.5 # starting value of epsilon
REPLAY_MEMORY = 50000 # number of previous transitions to remember
BATCH = 32 # size of minibatch
FRAME_PER_ACTION = 1

In [4]:
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev = 0.01)
    return tf.Variable(initial)

In [5]:
def bias_variable(shape):
    initial = tf.constant(0.01, shape = shape)
    return tf.Variable(initial)

In [6]:
def conv2d(x, W, stride):
    return tf.nn.conv2d(x, W, strides = [1, stride, stride, 1], padding = "SAME")

In [7]:
def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = "SAME")

In [8]:
def createNetwork():
    # network weights
    W_conv1 = weight_variable([8, 8, 4, 32])
    b_conv1 = bias_variable([32])

    W_conv2 = weight_variable([4, 4, 32, 64])
    b_conv2 = bias_variable([64])

    W_conv3 = weight_variable([3, 3, 64, 64])
    b_conv3 = bias_variable([64])

    W_fc1 = weight_variable([1600, 512])
    b_fc1 = bias_variable([512])

    W_fc2 = weight_variable([512, ACTIONS])
    b_fc2 = bias_variable([ACTIONS])

    # input layer
    s = tf.placeholder("float", [None, 80, 80, 4])

    # hidden layers
    h_conv1 = tf.nn.relu(conv2d(s, W_conv1, 4) + b_conv1)
    h_pool1 = max_pool_2x2(h_conv1)

    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2, 2) + b_conv2)
    #h_pool2 = max_pool_2x2(h_conv2)

    h_conv3 = tf.nn.relu(conv2d(h_conv2, W_conv3, 1) + b_conv3)
    #h_pool3 = max_pool_2x2(h_conv3)

    #h_pool3_flat = tf.reshape(h_pool3, [-1, 256])
    h_conv3_flat = tf.reshape(h_conv3, [-1, 1600])

    h_fc1 = tf.nn.relu(tf.matmul(h_conv3_flat, W_fc1) + b_fc1)

    # readout layer
    readout = tf.matmul(h_fc1, W_fc2) + b_fc2

    return s, readout, h_fc1

In [9]:
def trainNetwork(s, readout, h_fc1, sess):
    # define the cost function
    a = tf.placeholder("float", [None, ACTIONS])
    y = tf.placeholder("float", [None])
    readout_action = tf.reduce_sum(tf.multiply(readout, a), reduction_indices=1)
    cost = tf.reduce_mean(tf.square(y - readout_action))
    train_step = tf.train.AdamOptimizer(1e-6).minimize(cost)

    # open up a game state to communicate with emulator
    game_state = game.GameState()

    # store the previous observations in replay memory
    D = deque()

    # printing
    a_file = open("logs_" + GAME + "/readout.txt", 'w')
    h_file = open("logs_" + GAME + "/hidden.txt", 'w')

    # get the first state by doing nothing and preprocess the image to 80x80x4
    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] = 1
    x_t, r_0, terminal = game_state.frame_step(do_nothing)
    x_t = cv2.cvtColor(cv2.resize(x_t, (80, 80)), cv2.COLOR_BGR2GRAY)
    etr, x_t = cv2.threshold(x_t,1,255,cv2.THRESH_BINARY)
    s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)

    # saving and loading networks
    saver = tf.train.Saver()
    sess.run(tf.initialize_all_variables())
    checkpoint = tf.train.get_checkpoint_state("saved_networks")
    if checkpoint and checkpoint.model_checkpoint_path:
        saver.restore(sess, checkpoint.model_checkpoint_path)
        print("Successfully loaded:", checkpoint.model_checkpoint_path)
    else:
        print("Could not find old network weights")

    # start training
    epsilon = INITIAL_EPSILON
    t = 0 
    
    while "flappy bird" != "angry bird":
        # choose an action epsilon greedily
        readout_t = readout.eval(feed_dict={s : [s_t]})[0]
        a_t = np.zeros([ACTIONS])
        action_index = 0
        if t % FRAME_PER_ACTION == 0:
            if random.random() <= epsilon:
                print("----------Random Action----------")
                
                if random.random() < 0.1 :
                    a_t[1] = 1
                    action_index = 1
                else:
                    a_t[0] = 1
                    action_index = 0
            else:
                action_index = np.argmax(readout_t)
                a_t[action_index] = 1
        else:
            a_t[0] = 1 # do nothing

        # scale down epsilon
        if epsilon > FINAL_EPSILON and t > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE

        # run the selected action and observe next state and reward
        x_t1_colored, r_t, terminal = game_state.frame_step(a_t)
        x_t1 = cv2.cvtColor(cv2.resize(x_t1_colored, (80, 80)), cv2.COLOR_BGR2GRAY)
        ret, x_t1 = cv2.threshold(x_t1, 1, 255, cv2.THRESH_BINARY)
        x_t1 = np.reshape(x_t1, (80, 80, 1))
        #s_t1 = np.append(x_t1, s_t[:,:,1:], axis = 2)
        s_t1 = np.append(x_t1, s_t[:, :, :3], axis=2)

        # store the transition in D
        D.append((s_t, a_t, r_t, s_t1, terminal))
        if len(D) > REPLAY_MEMORY:
            D.popleft()

        # only train if done observing
        if t > OBSERVE:
            # sample a minibatch to train on
            minibatch = random.sample(D, BATCH)

            # get the batch variables
            s_j_batch = [d[0] for d in minibatch]
            a_batch = [d[1] for d in minibatch]
            r_batch = [d[2] for d in minibatch]
            s_j1_batch = [d[3] for d in minibatch]

            y_batch = []
            readout_j1_batch = readout.eval(feed_dict = {s : s_j1_batch})
            for i in range(0, len(minibatch)):
                terminal = minibatch[i][4]
                # if terminal, only equals reward
                if terminal:
                    y_batch.append(r_batch[i])
                else:
                    y_batch.append(r_batch[i] + GAMMA * np.max(readout_j1_batch[i]))

            # perform gradient step
            train_step.run(feed_dict = {
                y : y_batch,
                a : a_batch,
                s : s_j_batch}
            )

        # update the old values
        s_t = s_t1
        t += 1

        # save progress every 10000 iterations
        if t % 10000 == 0:
            saver.save(sess, 'saved_networks/' + GAME + '-dqn', global_step = t)

        # print info
        state = ""
        if t <= OBSERVE:
            state = "observe"
        elif t > OBSERVE and t <= OBSERVE + EXPLORE:
            state = "explore"
        else:
            state = "train"

        print("TIMESTEP", t, "/ STATE", state, \
            "/ EPSILON", epsilon, "/ ACTION", action_index, "/ REWARD", r_t, \
            "/ Q_MAX %e" % np.max(readout_t))
        # write info to files
        '''
        if t % 10000 <= 100:
            a_file.write(",".join([str(x) for x in readout_t]) + '\n')
            h_file.write(",".join([str(x) for x in h_fc1.eval(feed_dict={s:[s_t]})[0]]) + '\n')
            cv2.imwrite("logs_tetris/frame" + str(t) + ".png", x_t1)
        '''

In [None]:
def playGame():
    sess = tf.InteractiveSession()
    s, readout, h_fc1 = createNetwork()
    trainNetwork(s, readout, h_fc1, sess)

def main():
    playGame()

if __name__ == "__main__":
    main()

Instructions for updating:
Use `tf.global_variables_initializer` instead.
INFO:tensorflow:Restoring parameters from saved_networks\bird-dqn-10000
Successfully loaded: saved_networks\bird-dqn-10000
TIMESTEP 1 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.215087e+01
----------Random Action----------
TIMESTEP 2 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.204418e+01
TIMESTEP 3 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.191034e+01
----------Random Action----------
TIMESTEP 4 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.184067e+01
----------Random Action----------
TIMESTEP 5 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.187737e+01
----------Random Action----------
TIMESTEP 6 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.189903e+01
----------Random Action----------
TIMESTEP 7 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.182255e+01
TIMESTEP 8 / STATE observe / EP

TIMESTEP 80 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.207721e+01
----------Random Action----------
TIMESTEP 81 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.224220e+01
----------Random Action----------
TIMESTEP 82 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.229432e+01
TIMESTEP 83 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222490e+01
TIMESTEP 84 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.227765e+01
----------Random Action----------
TIMESTEP 85 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.232348e+01
----------Random Action----------
TIMESTEP 86 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.236263e+01
----------Random Action----------
TIMESTEP 87 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.225242e+01
TIMESTEP 88 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.225109e+01
TIMESTEP 89 / STATE observe / EPSILON 0.5 / ACT

TIMESTEP 160 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.189099e+01
TIMESTEP 161 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.189276e+01
----------Random Action----------
TIMESTEP 162 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.191957e+01
----------Random Action----------
TIMESTEP 163 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192028e+01
TIMESTEP 164 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.197691e+01
TIMESTEP 165 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.195647e+01
TIMESTEP 166 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.196325e+01
TIMESTEP 167 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.195385e+01
----------Random Action----------
TIMESTEP 168 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192622e+01
TIMESTEP 169 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.201404e+01
TIMESTEP 170 / STA

TIMESTEP 242 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231382e+01
TIMESTEP 243 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.238040e+01
----------Random Action----------
TIMESTEP 244 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.237913e+01
TIMESTEP 245 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.223048e+01
----------Random Action----------
TIMESTEP 246 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222825e+01
----------Random Action----------
TIMESTEP 247 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.234337e+01
----------Random Action----------
TIMESTEP 248 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.235933e+01
TIMESTEP 249 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.229477e+01
----------Random Action----------
TIMESTEP 250 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.248460e+01
----------Random Action----------
TIME

TIMESTEP 324 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.228464e+01
----------Random Action----------
TIMESTEP 325 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.236585e+01
TIMESTEP 326 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.229594e+01
----------Random Action----------
TIMESTEP 327 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.226132e+01
----------Random Action----------
TIMESTEP 328 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.225210e+01
TIMESTEP 329 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.227143e+01
----------Random Action----------
TIMESTEP 330 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.225937e+01
TIMESTEP 331 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.219966e+01
----------Random Action----------
TIMESTEP 332 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.220248e+01
TIMESTEP 333 / STATE observe / EPSILON

----------Random Action----------
TIMESTEP 406 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.205538e+01
TIMESTEP 407 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.209303e+01
TIMESTEP 408 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.205595e+01
----------Random Action----------
TIMESTEP 409 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.208781e+01
TIMESTEP 410 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.211572e+01
----------Random Action----------
TIMESTEP 411 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222150e+01
----------Random Action----------
TIMESTEP 412 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.219582e+01
TIMESTEP 413 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.214821e+01
----------Random Action----------
TIMESTEP 414 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.213554e+01
TIMESTEP 415 / STATE observe / EPSILON

TIMESTEP 485 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.215911e+01
TIMESTEP 486 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.226319e+01
TIMESTEP 487 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.225800e+01
----------Random Action----------
TIMESTEP 488 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.220878e+01
TIMESTEP 489 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.227238e+01
----------Random Action----------
TIMESTEP 490 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.221142e+01
----------Random Action----------
TIMESTEP 491 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222311e+01
----------Random Action----------
TIMESTEP 492 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.218465e+01
TIMESTEP 493 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.221488e+01
TIMESTEP 494 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_M

TIMESTEP 566 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.288774e+01
TIMESTEP 567 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.298961e+01
----------Random Action----------
TIMESTEP 568 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.308741e+01
TIMESTEP 569 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.307359e+01
TIMESTEP 570 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 1 / Q_MAX 1.311965e+01
TIMESTEP 571 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231373e+01
TIMESTEP 572 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212142e+01
----------Random Action----------
TIMESTEP 573 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.218605e+01
TIMESTEP 574 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.215689e+01
----------Random Action----------
TIMESTEP 575 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.181846e+01
----------Random Act

TIMESTEP 645 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.185747e+01
TIMESTEP 646 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194210e+01
TIMESTEP 647 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194069e+01
----------Random Action----------
TIMESTEP 648 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.193989e+01
----------Random Action----------
TIMESTEP 649 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192487e+01
----------Random Action----------
TIMESTEP 650 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.195557e+01
----------Random Action----------
TIMESTEP 651 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.188706e+01
----------Random Action----------
TIMESTEP 652 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.185024e+01
TIMESTEP 653 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.189392e+01
----------Random Action----------
TIME

TIMESTEP 726 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.263764e+01
TIMESTEP 727 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.263159e+01
----------Random Action----------
TIMESTEP 728 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.258003e+01
TIMESTEP 729 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.260866e+01
----------Random Action----------
TIMESTEP 730 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.259227e+01
TIMESTEP 731 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.265619e+01
----------Random Action----------
TIMESTEP 732 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.264478e+01
----------Random Action----------
TIMESTEP 733 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.273207e+01
TIMESTEP 734 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.275043e+01
TIMESTEP 735 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_M

TIMESTEP 811 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.287859e+01
TIMESTEP 812 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.281433e+01
----------Random Action----------
TIMESTEP 813 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.581430e+00
TIMESTEP 814 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 1 / Q_MAX 1.016193e+00
----------Random Action----------
TIMESTEP 815 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD -1 / Q_MAX -3.404869e+00
----------Random Action----------
TIMESTEP 816 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.189845e+01
----------Random Action----------
TIMESTEP 817 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.188972e+01
TIMESTEP 818 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.184865e+01
TIMESTEP 819 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.184603e+01
TIMESTEP 820 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX

TIMESTEP 892 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.232132e+01
----------Random Action----------
TIMESTEP 893 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.237595e+01
----------Random Action----------
TIMESTEP 894 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.239947e+01
TIMESTEP 895 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.240709e+01
----------Random Action----------
TIMESTEP 896 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.243383e+01
TIMESTEP 897 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.259008e+01
----------Random Action----------
TIMESTEP 898 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.261492e+01
TIMESTEP 899 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.260132e+01
TIMESTEP 900 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.255150e+01
TIMESTEP 901 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_M

TIMESTEP 972 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.279427e+01
----------Random Action----------
TIMESTEP 973 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.280052e+01
----------Random Action----------
TIMESTEP 974 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.289645e+01
TIMESTEP 975 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.286542e+01
----------Random Action----------
TIMESTEP 976 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 1 / Q_MAX 1.295849e+01
----------Random Action----------
TIMESTEP 977 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.214490e+01
TIMESTEP 978 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.197534e+01
----------Random Action----------
TIMESTEP 979 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.208373e+01
----------Random Action----------
TIMESTEP 980 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.203740e+01
TIMEST

----------Random Action----------
TIMESTEP 1053 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.223684e+01
----------Random Action----------
TIMESTEP 1054 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231637e+01
----------Random Action----------
TIMESTEP 1055 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.239417e+01
----------Random Action----------
TIMESTEP 1056 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.240650e+01
TIMESTEP 1057 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.245049e+01
TIMESTEP 1058 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.254869e+01
TIMESTEP 1059 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.256774e+01
TIMESTEP 1060 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.256240e+01
----------Random Action----------
TIMESTEP 1061 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.257585e+01
----------Random Action------

TIMESTEP 1133 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.260359e+01
----------Random Action----------
TIMESTEP 1134 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.257934e+01
----------Random Action----------
TIMESTEP 1135 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.264958e+01
----------Random Action----------
TIMESTEP 1136 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.271005e+01
TIMESTEP 1137 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.277810e+01
TIMESTEP 1138 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.280196e+01
----------Random Action----------
TIMESTEP 1139 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.285081e+01
TIMESTEP 1140 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.293307e+01
----------Random Action----------
TIMESTEP 1141 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.284795e+01
TIMESTEP 1142 / STATE observe

TIMESTEP 1213 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.183396e+01
TIMESTEP 1214 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192839e+01
TIMESTEP 1215 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.188560e+01
----------Random Action----------
TIMESTEP 1216 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.184814e+01
TIMESTEP 1217 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.183718e+01
TIMESTEP 1218 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.185437e+01
TIMESTEP 1219 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.190447e+01
----------Random Action----------
TIMESTEP 1220 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.190908e+01
----------Random Action----------
TIMESTEP 1221 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.190833e+01
TIMESTEP 1222 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.187231e+01
--------

TIMESTEP 1294 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.210002e+01
TIMESTEP 1295 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.220688e+01
TIMESTEP 1296 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.227604e+01
----------Random Action----------
TIMESTEP 1297 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.219796e+01
TIMESTEP 1298 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.226367e+01
TIMESTEP 1299 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.223142e+01
----------Random Action----------
TIMESTEP 1300 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.224272e+01
TIMESTEP 1301 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.216026e+01
TIMESTEP 1302 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.207593e+01
----------Random Action----------
TIMESTEP 1303 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192553e+01
--------

----------Random Action----------
TIMESTEP 1373 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.187320e+01
TIMESTEP 1374 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.182952e+01
----------Random Action----------
TIMESTEP 1375 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.187029e+01
TIMESTEP 1376 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.191355e+01
TIMESTEP 1377 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.190809e+01
----------Random Action----------
TIMESTEP 1378 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.191943e+01
----------Random Action----------
TIMESTEP 1379 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.187847e+01
TIMESTEP 1380 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.184164e+01
----------Random Action----------
TIMESTEP 1381 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.186189e+01
----------Random Action------

TIMESTEP 1450 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.231950e+01
----------Random Action----------
TIMESTEP 1451 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.233117e+01
----------Random Action----------
TIMESTEP 1452 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.239475e+01
----------Random Action----------
TIMESTEP 1453 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.241212e+01
TIMESTEP 1454 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.234894e+01
TIMESTEP 1455 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.239228e+01
TIMESTEP 1456 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.234877e+01
TIMESTEP 1457 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.240146e+01
TIMESTEP 1458 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.241103e+01
----------Random Action----------
TIMESTEP 1459 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD

----------Random Action----------
TIMESTEP 1530 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.199082e+01
TIMESTEP 1531 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.196495e+01
----------Random Action----------
TIMESTEP 1532 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194759e+01
TIMESTEP 1533 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.195923e+01
----------Random Action----------
TIMESTEP 1534 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.200696e+01
TIMESTEP 1535 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.202139e+01
----------Random Action----------
TIMESTEP 1536 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.211296e+01
TIMESTEP 1537 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.209941e+01
----------Random Action----------
TIMESTEP 1538 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.211350e+01
TIMESTEP 1539 / STATE observe

TIMESTEP 1610 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.193827e+01
----------Random Action----------
TIMESTEP 1611 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192334e+01
----------Random Action----------
TIMESTEP 1612 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.197830e+01
----------Random Action----------
TIMESTEP 1613 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.201246e+01
----------Random Action----------
TIMESTEP 1614 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.200173e+01
TIMESTEP 1615 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.205229e+01
----------Random Action----------
TIMESTEP 1616 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.211619e+01
----------Random Action----------
TIMESTEP 1617 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.209527e+01
----------Random Action----------
TIMESTEP 1618 / STATE observe / EPSILON 0.5 / ACTI

TIMESTEP 1688 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.255302e+01
----------Random Action----------
TIMESTEP 1689 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.261287e+01
----------Random Action----------
TIMESTEP 1690 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.261429e+01
----------Random Action----------
TIMESTEP 1691 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.269595e+01
TIMESTEP 1692 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.275623e+01
----------Random Action----------
TIMESTEP 1693 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.276268e+01
----------Random Action----------
TIMESTEP 1694 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.279591e+01
TIMESTEP 1695 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.284201e+01
TIMESTEP 1696 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.297735e+01
----------Random Action------

TIMESTEP 1769 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.228813e+01
TIMESTEP 1770 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.233173e+01
----------Random Action----------
TIMESTEP 1771 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.230550e+01
----------Random Action----------
TIMESTEP 1772 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.232285e+01
----------Random Action----------
TIMESTEP 1773 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.229415e+01
----------Random Action----------
TIMESTEP 1774 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.232309e+01
TIMESTEP 1775 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231454e+01
TIMESTEP 1776 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.235265e+01
----------Random Action----------
TIMESTEP 1777 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.237096e+01
----------Random Action------

TIMESTEP 1848 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.210068e+01
----------Random Action----------
TIMESTEP 1849 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.221931e+01
TIMESTEP 1850 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.226213e+01
----------Random Action----------
TIMESTEP 1851 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.227585e+01
TIMESTEP 1852 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.230250e+01
TIMESTEP 1853 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.218749e+01
----------Random Action----------
TIMESTEP 1854 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222527e+01
TIMESTEP 1855 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212280e+01
----------Random Action----------
TIMESTEP 1856 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.219145e+01
TIMESTEP 1857 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

TIMESTEP 1928 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.274393e+01
----------Random Action----------
TIMESTEP 1929 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.272735e+01
TIMESTEP 1930 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.269172e+01
TIMESTEP 1931 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.265089e+01
----------Random Action----------
TIMESTEP 1932 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.261812e+01
TIMESTEP 1933 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.271958e+01
----------Random Action----------
TIMESTEP 1934 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.269810e+01
TIMESTEP 1935 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.273331e+01
----------Random Action----------
TIMESTEP 1936 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.274927e+01
TIMESTEP 1937 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

----------Random Action----------
TIMESTEP 2007 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194729e+01
----------Random Action----------
TIMESTEP 2008 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.201687e+01
TIMESTEP 2009 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.194235e+01
TIMESTEP 2010 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.206022e+01
----------Random Action----------
TIMESTEP 2011 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.205548e+01
TIMESTEP 2012 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.201689e+01
TIMESTEP 2013 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.200089e+01
----------Random Action----------
TIMESTEP 2014 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.200312e+01
----------Random Action----------
TIMESTEP 2015 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.206099e+01
----------Random Action------

TIMESTEP 2086 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.274062e+01
TIMESTEP 2087 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.271558e+01
TIMESTEP 2088 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.275157e+01
TIMESTEP 2089 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.265354e+01
TIMESTEP 2090 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.273036e+01
----------Random Action----------
TIMESTEP 2091 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.267879e+01
TIMESTEP 2092 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.266975e+01
TIMESTEP 2093 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.268406e+01
----------Random Action----------
TIMESTEP 2094 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.272063e+01
----------Random Action----------
TIMESTEP 2095 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.272077e+01
TIMESTEP

TIMESTEP 2171 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.233786e+01
----------Random Action----------
TIMESTEP 2172 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.237487e+01
----------Random Action----------
TIMESTEP 2173 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.237231e+01
TIMESTEP 2174 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.253145e+01
----------Random Action----------
TIMESTEP 2175 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.258812e+01
----------Random Action----------
TIMESTEP 2176 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.263695e+01
TIMESTEP 2177 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.259979e+01
TIMESTEP 2178 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.262324e+01
----------Random Action----------
TIMESTEP 2179 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.266088e+01
TIMESTEP 2180 / STATE observe

TIMESTEP 2250 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.185896e+01
----------Random Action----------
TIMESTEP 2251 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.190449e+01
----------Random Action----------
TIMESTEP 2252 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.193051e+01
----------Random Action----------
TIMESTEP 2253 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192684e+01
----------Random Action----------
TIMESTEP 2254 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.197905e+01
TIMESTEP 2255 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.204403e+01
TIMESTEP 2256 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.207566e+01
TIMESTEP 2257 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.208920e+01
TIMESTEP 2258 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.219818e+01
TIMESTEP 2259 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

TIMESTEP 2331 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.245722e+01
TIMESTEP 2332 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.247827e+01
----------Random Action----------
TIMESTEP 2333 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.254384e+01
TIMESTEP 2334 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.259684e+01
TIMESTEP 2335 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.259850e+01
TIMESTEP 2336 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.272445e+01
TIMESTEP 2337 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.274561e+01
----------Random Action----------
TIMESTEP 2338 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.270197e+01
TIMESTEP 2339 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.266569e+01
----------Random Action----------
TIMESTEP 2340 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.266912e+01
--------

TIMESTEP 2411 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231521e+01
----------Random Action----------
TIMESTEP 2412 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.230324e+01
TIMESTEP 2413 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231387e+01
TIMESTEP 2414 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231761e+01
TIMESTEP 2415 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.230964e+01
TIMESTEP 2416 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.230034e+01
TIMESTEP 2417 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.225348e+01
----------Random Action----------
TIMESTEP 2418 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.225245e+01
----------Random Action----------
TIMESTEP 2419 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231904e+01
TIMESTEP 2420 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.234198e+01
--------

TIMESTEP 2493 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.223352e+01
TIMESTEP 2494 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.218961e+01
TIMESTEP 2495 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212339e+01
TIMESTEP 2496 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.215463e+01
----------Random Action----------
TIMESTEP 2497 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.216028e+01
TIMESTEP 2498 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.219879e+01
TIMESTEP 2499 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.225394e+01
TIMESTEP 2500 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231153e+01
TIMESTEP 2501 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.234996e+01
TIMESTEP 2502 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.237328e+01
----------Random Action----------
TIMESTEP 2503 / STATE observe / EPSILON 0.

TIMESTEP 2574 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.221010e+01
----------Random Action----------
TIMESTEP 2575 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.200718e+01
----------Random Action----------
TIMESTEP 2576 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.203778e+01
----------Random Action----------
TIMESTEP 2577 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.209958e+01
TIMESTEP 2578 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.217109e+01
TIMESTEP 2579 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.218329e+01
TIMESTEP 2580 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.229763e+01
TIMESTEP 2581 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.224236e+01
----------Random Action----------
TIMESTEP 2582 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.232468e+01
----------Random Action----------
TIMESTEP 2583 / STATE observe

TIMESTEP 2653 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.199805e+01
TIMESTEP 2654 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.198485e+01
----------Random Action----------
TIMESTEP 2655 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202295e+01
----------Random Action----------
TIMESTEP 2656 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.196370e+01
----------Random Action----------
TIMESTEP 2657 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.173497e+01
----------Random Action----------
TIMESTEP 2658 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.179437e+01
----------Random Action----------
TIMESTEP 2659 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.178236e+01
TIMESTEP 2660 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.149187e+01
----------Random Action----------
TIMESTEP 2661 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.185784

TIMESTEP 2732 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.264535e+01
----------Random Action----------
TIMESTEP 2733 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.264602e+01
TIMESTEP 2734 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.276668e+01
TIMESTEP 2735 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.268543e+01
----------Random Action----------
TIMESTEP 2736 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.261396e+01
----------Random Action----------
TIMESTEP 2737 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.265155e+01
----------Random Action----------
TIMESTEP 2738 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.263585e+01
TIMESTEP 2739 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.274222e+01
TIMESTEP 2740 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.274726e+01
----------Random Action----------
TIMESTEP 2741 / STATE observe

TIMESTEP 2811 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX -1.868138e+00
----------Random Action----------
TIMESTEP 2812 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.022620e+00
----------Random Action----------
TIMESTEP 2813 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 4.611585e+00
TIMESTEP 2814 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 3.529375e+00
----------Random Action----------
TIMESTEP 2815 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 3.351470e+00
----------Random Action----------
TIMESTEP 2816 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 5.264062e+00
----------Random Action----------
TIMESTEP 2817 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 3.924251e+00
----------Random Action----------
TIMESTEP 2818 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.904141e+00
----------Random Action----------
TIMESTEP 2819 / STATE observe / EPSILON 0.5 / ACT

TIMESTEP 2890 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.217281e+01
TIMESTEP 2891 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.218640e+01
TIMESTEP 2892 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222893e+01
TIMESTEP 2893 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.218033e+01
----------Random Action----------
TIMESTEP 2894 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.216975e+01
----------Random Action----------
TIMESTEP 2895 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.218931e+01
TIMESTEP 2896 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.216741e+01
----------Random Action----------
TIMESTEP 2897 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.219621e+01
TIMESTEP 2898 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.214754e+01
----------Random Action----------
TIMESTEP 2899 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

TIMESTEP 2974 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.170819e+01
----------Random Action----------
TIMESTEP 2975 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.177091e+01
----------Random Action----------
TIMESTEP 2976 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.175088e+01
----------Random Action----------
TIMESTEP 2977 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.181781e+01
TIMESTEP 2978 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192402e+01
----------Random Action----------
TIMESTEP 2979 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.199201e+01
----------Random Action----------
TIMESTEP 2980 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.201872e+01
TIMESTEP 2981 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202290e+01
----------Random Action----------
TIMESTEP 2982 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.207704

----------Random Action----------
TIMESTEP 3055 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.232419e+01
----------Random Action----------
TIMESTEP 3056 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231267e+01
----------Random Action----------
TIMESTEP 3057 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.233048e+01
TIMESTEP 3058 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.239213e+01
TIMESTEP 3059 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.245429e+01
TIMESTEP 3060 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.251257e+01
TIMESTEP 3061 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.253578e+01
TIMESTEP 3062 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.255261e+01
TIMESTEP 3063 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.261151e+01
----------Random Action----------
TIMESTEP 3064 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

TIMESTEP 3138 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.193474e+01
----------Random Action----------
TIMESTEP 3139 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192233e+01
----------Random Action----------
TIMESTEP 3140 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194586e+01
----------Random Action----------
TIMESTEP 3141 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194906e+01
TIMESTEP 3142 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.194642e+01
----------Random Action----------
TIMESTEP 3143 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194397e+01
----------Random Action----------
TIMESTEP 3144 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.193566e+01
----------Random Action----------
TIMESTEP 3145 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.187650e+01
----------Random Action----------
TIMESTEP 3146 / STATE observe / EPSILON 0.5 / ACTI

TIMESTEP 3216 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.234923e+01
TIMESTEP 3217 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.231499e+01
TIMESTEP 3218 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.235651e+01
----------Random Action----------
TIMESTEP 3219 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.237701e+01
TIMESTEP 3220 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.239496e+01
TIMESTEP 3221 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.241306e+01
TIMESTEP 3222 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.244304e+01
----------Random Action----------
TIMESTEP 3223 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.245209e+01
----------Random Action----------
TIMESTEP 3224 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.243507e+01
----------Random Action----------
TIMESTEP 3225 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

TIMESTEP 3297 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.193940e+01
TIMESTEP 3298 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.195316e+01
TIMESTEP 3299 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.201103e+01
TIMESTEP 3300 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.201255e+01
TIMESTEP 3301 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.204260e+01
TIMESTEP 3302 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.200458e+01
TIMESTEP 3303 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.200076e+01
TIMESTEP 3304 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.195573e+01
TIMESTEP 3305 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.201516e+01
----------Random Action----------
TIMESTEP 3306 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.207666e+01
TIMESTEP 3307 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 

TIMESTEP 3377 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231739e+01
----------Random Action----------
TIMESTEP 3378 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.236287e+01
----------Random Action----------
TIMESTEP 3379 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.227585e+01
----------Random Action----------
TIMESTEP 3380 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.227541e+01
TIMESTEP 3381 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.220111e+01
TIMESTEP 3382 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.218717e+01
----------Random Action----------
TIMESTEP 3383 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.216236e+01
TIMESTEP 3384 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.215000e+01
TIMESTEP 3385 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.218635e+01
TIMESTEP 3386 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

TIMESTEP 3455 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.215088e+01
TIMESTEP 3456 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.210452e+01
----------Random Action----------
TIMESTEP 3457 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.214243e+01
TIMESTEP 3458 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.210318e+01
TIMESTEP 3459 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.214831e+01
----------Random Action----------
TIMESTEP 3460 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.215659e+01
----------Random Action----------
TIMESTEP 3461 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.226325e+01
----------Random Action----------
TIMESTEP 3462 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.229313e+01
----------Random Action----------
TIMESTEP 3463 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.228523e+01
TIMESTEP 3464 / STATE observe

TIMESTEP 3534 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.223278e+01
TIMESTEP 3535 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.224824e+01
TIMESTEP 3536 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.234810e+01
----------Random Action----------
TIMESTEP 3537 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.234636e+01
----------Random Action----------
TIMESTEP 3538 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.232902e+01
----------Random Action----------
TIMESTEP 3539 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.235745e+01
----------Random Action----------
TIMESTEP 3540 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.243768e+01
----------Random Action----------
TIMESTEP 3541 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.248370e+01
TIMESTEP 3542 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.255678e+01
TIMESTEP 3543 / STATE observe

TIMESTEP 3614 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194870e+01
TIMESTEP 3615 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.195898e+01
TIMESTEP 3616 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.200012e+01
TIMESTEP 3617 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.204703e+01
----------Random Action----------
TIMESTEP 3618 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.196911e+01
TIMESTEP 3619 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.194577e+01
TIMESTEP 3620 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.196600e+01
----------Random Action----------
TIMESTEP 3621 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.203933e+01
TIMESTEP 3622 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202996e+01
TIMESTEP 3623 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.198979e+01
TIMESTEP 3624 / STATE observe / EPSILON 0.

TIMESTEP 3699 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.267888e+01
----------Random Action----------
TIMESTEP 3700 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.268879e+01
----------Random Action----------
TIMESTEP 3701 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.275043e+01
TIMESTEP 3702 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 9.607197e+00
----------Random Action----------
TIMESTEP 3703 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD -1 / Q_MAX -1.393379e-01
TIMESTEP 3704 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.196753e+01
TIMESTEP 3705 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.191209e+01
----------Random Action----------
TIMESTEP 3706 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.195778e+01
TIMESTEP 3707 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.196218e+01
TIMESTEP 3708 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

TIMESTEP 3778 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.227903e+01
----------Random Action----------
TIMESTEP 3779 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.223860e+01
TIMESTEP 3780 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.228763e+01
TIMESTEP 3781 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.229550e+01
----------Random Action----------
TIMESTEP 3782 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.225538e+01
----------Random Action----------
TIMESTEP 3783 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231327e+01
----------Random Action----------
TIMESTEP 3784 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.215681e+01
TIMESTEP 3785 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.224608e+01
TIMESTEP 3786 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.216343e+01
TIMESTEP 3787 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD

----------Random Action----------
TIMESTEP 3857 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.237807e+01
TIMESTEP 3858 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.235334e+01
TIMESTEP 3859 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.233264e+01
----------Random Action----------
TIMESTEP 3860 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.242918e+01
----------Random Action----------
TIMESTEP 3861 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.249645e+01
TIMESTEP 3862 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.250678e+01
TIMESTEP 3863 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.266809e+01
----------Random Action----------
TIMESTEP 3864 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.268097e+01
----------Random Action----------
TIMESTEP 3865 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.268936e+01
TIMESTEP 3866 / STATE observe

TIMESTEP 3935 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.195366e+01
----------Random Action----------
TIMESTEP 3936 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.198650e+01
----------Random Action----------
TIMESTEP 3937 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.192732e+01
----------Random Action----------
TIMESTEP 3938 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.185541e+01
TIMESTEP 3939 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.206330e+01
----------Random Action----------
TIMESTEP 3940 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.205970e+01
----------Random Action----------
TIMESTEP 3941 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.213413e+01
----------Random Action----------
TIMESTEP 3942 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212726e+01
TIMESTEP 3943 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.213978

TIMESTEP 4015 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202251e+01
TIMESTEP 4016 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.211611e+01
----------Random Action----------
TIMESTEP 4017 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.206432e+01
TIMESTEP 4018 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.224448e+01
TIMESTEP 4019 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.229618e+01
TIMESTEP 4020 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.220785e+01
----------Random Action----------
TIMESTEP 4021 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.227484e+01
TIMESTEP 4022 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.233069e+01
----------Random Action----------
TIMESTEP 4023 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.235683e+01
----------Random Action----------
TIMESTEP 4024 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

TIMESTEP 4093 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192578e+01
TIMESTEP 4094 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.190211e+01
TIMESTEP 4095 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.190212e+01
----------Random Action----------
TIMESTEP 4096 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.186436e+01
TIMESTEP 4097 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.189099e+01
----------Random Action----------
TIMESTEP 4098 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.189276e+01
----------Random Action----------
TIMESTEP 4099 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.191957e+01
----------Random Action----------
TIMESTEP 4100 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192028e+01
----------Random Action----------
TIMESTEP 4101 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.197691e+01
----------Random Action------

TIMESTEP 4173 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.200687e+01
----------Random Action----------
TIMESTEP 4174 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.197529e+01
----------Random Action----------
TIMESTEP 4175 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.203119e+01
----------Random Action----------
TIMESTEP 4176 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.210967e+01
----------Random Action----------
TIMESTEP 4177 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212850e+01
TIMESTEP 4178 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.204452e+01
----------Random Action----------
TIMESTEP 4179 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.208231e+01
----------Random Action----------
TIMESTEP 4180 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.216928e+01
TIMESTEP 4181 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.221460

TIMESTEP 4255 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.198087e+01
----------Random Action----------
TIMESTEP 4256 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.200295e+01
TIMESTEP 4257 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.204484e+01
----------Random Action----------
TIMESTEP 4258 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.197050e+01
----------Random Action----------
TIMESTEP 4259 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.190836e+01
TIMESTEP 4260 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.190072e+01
----------Random Action----------
TIMESTEP 4261 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.195211e+01
TIMESTEP 4262 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.200721e+01
----------Random Action----------
TIMESTEP 4263 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.207531e+01
TIMESTEP 4264 / STATE observe

TIMESTEP 4334 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX -3.080454e+00
TIMESTEP 4335 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX -3.657062e+00
----------Random Action----------
TIMESTEP 4336 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX -8.815798e-01
----------Random Action----------
TIMESTEP 4337 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.141803e+00
TIMESTEP 4338 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 2.178990e+00
----------Random Action----------
TIMESTEP 4339 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 3.364544e+00
----------Random Action----------
TIMESTEP 4340 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 4.569736e+00
----------Random Action----------
TIMESTEP 4341 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 6.271216e+00
TIMESTEP 4342 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 5.566550e+00
----------Random Action---

----------Random Action----------
TIMESTEP 4414 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.197723e+01
----------Random Action----------
TIMESTEP 4415 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.189991e+01
TIMESTEP 4416 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.193211e+01
----------Random Action----------
TIMESTEP 4417 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.193898e+01
TIMESTEP 4418 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.201292e+01
----------Random Action----------
TIMESTEP 4419 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.200463e+01
----------Random Action----------
TIMESTEP 4420 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.198567e+01
----------Random Action----------
TIMESTEP 4421 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.200298e+01
TIMESTEP 4422 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.203230

TIMESTEP 4492 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.224998e+01
----------Random Action----------
TIMESTEP 4493 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.229655e+01
TIMESTEP 4494 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.230684e+01
----------Random Action----------
TIMESTEP 4495 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.234414e+01
----------Random Action----------
TIMESTEP 4496 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.229220e+01
----------Random Action----------
TIMESTEP 4497 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.237134e+01
----------Random Action----------
TIMESTEP 4498 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.241656e+01
----------Random Action----------
TIMESTEP 4499 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.232837e+01
TIMESTEP 4500 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.236246

TIMESTEP 4572 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.219594e+01
TIMESTEP 4573 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.216385e+01
TIMESTEP 4574 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.227283e+01
TIMESTEP 4575 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.228502e+01
----------Random Action----------
TIMESTEP 4576 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.229847e+01
TIMESTEP 4577 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.232024e+01
----------Random Action----------
TIMESTEP 4578 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.225804e+01
TIMESTEP 4579 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231745e+01
----------Random Action----------
TIMESTEP 4580 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.231441e+01
TIMESTEP 4581 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.228797e+01
TIMESTEP

TIMESTEP 4657 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.241085e+01
----------Random Action----------
TIMESTEP 4658 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.243024e+01
----------Random Action----------
TIMESTEP 4659 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.240927e+01
----------Random Action----------
TIMESTEP 4660 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.249000e+01
----------Random Action----------
TIMESTEP 4661 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.250860e+01
----------Random Action----------
TIMESTEP 4662 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.250524e+01
TIMESTEP 4663 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.249929e+01
----------Random Action----------
TIMESTEP 4664 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.249990e+01
----------Random Action----------
TIMESTEP 4665 / STATE observe / EPSILON 0.5 / ACTI

TIMESTEP 4735 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.203039e+01
----------Random Action----------
TIMESTEP 4736 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.208112e+01
----------Random Action----------
TIMESTEP 4737 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.211177e+01
TIMESTEP 4738 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.137374e+01
TIMESTEP 4739 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD -1 / Q_MAX -1.838136e+00
----------Random Action----------
TIMESTEP 4740 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.198697e+01
----------Random Action----------
TIMESTEP 4741 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.179466e+01
----------Random Action----------
TIMESTEP 4742 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.190079e+01
TIMESTEP 4743 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.189176e+01
TIMESTEP 4744 / STATE observe

TIMESTEP 4814 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.246794e+01
----------Random Action----------
TIMESTEP 4815 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.251500e+01
----------Random Action----------
TIMESTEP 4816 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.252821e+01
----------Random Action----------
TIMESTEP 4817 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.258760e+01
----------Random Action----------
TIMESTEP 4818 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.260775e+01
TIMESTEP 4819 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.262067e+01
----------Random Action----------
TIMESTEP 4820 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.260255e+01
----------Random Action----------
TIMESTEP 4821 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.274184e+01
----------Random Action----------
TIMESTEP 4822 / STATE observe / EPSILON 0.5 / ACTI

TIMESTEP 4893 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.261560e+01
----------Random Action----------
TIMESTEP 4894 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD -1 / Q_MAX 6.856355e+00
----------Random Action----------
TIMESTEP 4895 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192772e+01
TIMESTEP 4896 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.189858e+01
----------Random Action----------
TIMESTEP 4897 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.186684e+01
----------Random Action----------
TIMESTEP 4898 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.185851e+01
TIMESTEP 4899 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.174291e+01
----------Random Action----------
TIMESTEP 4900 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.172268e+01
----------Random Action----------
TIMESTEP 4901 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.175039e

TIMESTEP 4971 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.221127e+01
----------Random Action----------
TIMESTEP 4972 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.225581e+01
TIMESTEP 4973 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.227045e+01
TIMESTEP 4974 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.234419e+01
----------Random Action----------
TIMESTEP 4975 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.229550e+01
----------Random Action----------
TIMESTEP 4976 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.233707e+01
TIMESTEP 4977 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.232381e+01
----------Random Action----------
TIMESTEP 4978 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.236176e+01
TIMESTEP 4979 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.233824e+01
TIMESTEP 4980 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

TIMESTEP 5052 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.259686e+01
----------Random Action----------
TIMESTEP 5053 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.267398e+01
TIMESTEP 5054 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.271523e+01
----------Random Action----------
TIMESTEP 5055 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.276869e+01
----------Random Action----------
TIMESTEP 5056 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.280164e+01
----------Random Action----------
TIMESTEP 5057 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.284361e+01
TIMESTEP 5058 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.294796e+01
TIMESTEP 5059 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.295973e+01
----------Random Action----------
TIMESTEP 5060 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 1 / Q_MAX 1.304801e+01
----------Random Action--------

TIMESTEP 5132 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.266255e+01
TIMESTEP 5133 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.020996e+01
TIMESTEP 5134 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.134203e+01
----------Random Action----------
TIMESTEP 5135 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 2.247997e+00
----------Random Action----------
TIMESTEP 5136 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD -1 / Q_MAX -2.062356e+00
TIMESTEP 5137 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.182060e+01
----------Random Action----------
TIMESTEP 5138 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192644e+01
----------Random Action----------
TIMESTEP 5139 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192904e+01
----------Random Action----------
TIMESTEP 5140 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.196218e+01
TIMESTEP 5141 / STATE observe

TIMESTEP 5211 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.223560e+01
----------Random Action----------
TIMESTEP 5212 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.216265e+01
TIMESTEP 5213 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.225212e+01
----------Random Action----------
TIMESTEP 5214 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.228756e+01
TIMESTEP 5215 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.221976e+01
TIMESTEP 5216 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.229349e+01
TIMESTEP 5217 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222880e+01
TIMESTEP 5218 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.227601e+01
----------Random Action----------
TIMESTEP 5219 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.216834e+01
----------Random Action----------
TIMESTEP 5220 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

TIMESTEP 5291 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.273990e+01
----------Random Action----------
TIMESTEP 5292 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.278011e+01
----------Random Action----------
TIMESTEP 5293 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.288105e+01
----------Random Action----------
TIMESTEP 5294 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.282947e+01
----------Random Action----------
TIMESTEP 5295 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.295053e+01
----------Random Action----------
TIMESTEP 5296 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.292673e+01
----------Random Action----------
TIMESTEP 5297 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 1 / Q_MAX 1.303144e+01
----------Random Action----------
TIMESTEP 5298 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.223645e+01
----------Random Action----------
TIMESTEP 5299 / ST

TIMESTEP 5371 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.214245e+01
----------Random Action----------
TIMESTEP 5372 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222732e+01
----------Random Action----------
TIMESTEP 5373 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.218011e+01
TIMESTEP 5374 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.217693e+01
TIMESTEP 5375 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.219491e+01
----------Random Action----------
TIMESTEP 5376 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.218770e+01
----------Random Action----------
TIMESTEP 5377 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.217414e+01
----------Random Action----------
TIMESTEP 5378 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.216439e+01
TIMESTEP 5379 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.221910e+01
----------Random Action------

----------Random Action----------
TIMESTEP 5452 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.268721e+01
----------Random Action----------
TIMESTEP 5453 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.269232e+01
TIMESTEP 5454 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.271612e+01
TIMESTEP 5455 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.264663e+01
TIMESTEP 5456 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.273014e+01
TIMESTEP 5457 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.276674e+01
TIMESTEP 5458 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.282742e+01
TIMESTEP 5459 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.283194e+01
TIMESTEP 5460 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.289599e+01
TIMESTEP 5461 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.301887e+01
----------Random Action----------
TIMESTEP

TIMESTEP 5531 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.201053e+01
TIMESTEP 5532 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.201581e+01
TIMESTEP 5533 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.200577e+01
----------Random Action----------
TIMESTEP 5534 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.201135e+01
TIMESTEP 5535 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.203507e+01
----------Random Action----------
TIMESTEP 5536 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.208226e+01
TIMESTEP 5537 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.202536e+01
TIMESTEP 5538 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202353e+01
TIMESTEP 5539 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.204832e+01
TIMESTEP 5540 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.199867e+01
----------Random Action----------
TIMESTEP

TIMESTEP 5610 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.216294e+01
TIMESTEP 5611 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.219684e+01
TIMESTEP 5612 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.221263e+01
----------Random Action----------
TIMESTEP 5613 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231839e+01
----------Random Action----------
TIMESTEP 5614 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.236187e+01
----------Random Action----------
TIMESTEP 5615 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231161e+01
----------Random Action----------
TIMESTEP 5616 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.241219e+01
TIMESTEP 5617 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.236421e+01
TIMESTEP 5618 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.236859e+01
----------Random Action----------
TIMESTEP 5619 / STATE observe

TIMESTEP 5693 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.242535e+01
----------Random Action----------
TIMESTEP 5694 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.240016e+01
TIMESTEP 5695 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.240252e+01
TIMESTEP 5696 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.247523e+01
----------Random Action----------
TIMESTEP 5697 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.249958e+01
TIMESTEP 5698 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.251459e+01
TIMESTEP 5699 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.266840e+01
TIMESTEP 5700 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.268512e+01
TIMESTEP 5701 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.268214e+01
----------Random Action----------
TIMESTEP 5702 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.260367e+01
TIMESTEP

TIMESTEP 5774 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202802e+01
----------Random Action----------
TIMESTEP 5775 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.208034e+01
----------Random Action----------
TIMESTEP 5776 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.213837e+01
----------Random Action----------
TIMESTEP 5777 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.216240e+01
----------Random Action----------
TIMESTEP 5778 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.215873e+01
----------Random Action----------
TIMESTEP 5779 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.197376e+01
----------Random Action----------
TIMESTEP 5780 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.206618e+01
----------Random Action----------
TIMESTEP 5781 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.201491e+01
----------Random Action----------
TIMESTEP 5782 / 

----------Random Action----------
TIMESTEP 5854 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.263874e+01
----------Random Action----------
TIMESTEP 5855 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.264861e+01
TIMESTEP 5856 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.264117e+01
TIMESTEP 5857 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.270734e+01
----------Random Action----------
TIMESTEP 5858 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.263262e+01
----------Random Action----------
TIMESTEP 5859 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.273706e+01
----------Random Action----------
TIMESTEP 5860 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD -1 / Q_MAX 1.273160e+01
----------Random Action----------
TIMESTEP 5861 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.187798e+01
TIMESTEP 5862 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.179050e

TIMESTEP 5932 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.234770e+01
TIMESTEP 5933 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.238605e+01
----------Random Action----------
TIMESTEP 5934 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.236930e+01
TIMESTEP 5935 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.250944e+01
TIMESTEP 5936 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.253887e+01
----------Random Action----------
TIMESTEP 5937 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.249405e+01
----------Random Action----------
TIMESTEP 5938 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.251386e+01
TIMESTEP 5939 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.257268e+01
TIMESTEP 5940 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.255715e+01
----------Random Action----------
TIMESTEP 5941 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

TIMESTEP 6011 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.271945e+01
TIMESTEP 6012 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.269809e+01
TIMESTEP 6013 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.270885e+01
----------Random Action----------
TIMESTEP 6014 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.285334e+01
----------Random Action----------
TIMESTEP 6015 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.285256e+01
----------Random Action----------
TIMESTEP 6016 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 1 / Q_MAX 1.289599e+01
TIMESTEP 6017 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.214638e+01
TIMESTEP 6018 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.191397e+01
----------Random Action----------
TIMESTEP 6019 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.190246e+01
TIMESTEP 6020 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0

TIMESTEP 6089 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX -9.982400e-02
----------Random Action----------
TIMESTEP 6090 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 7.166432e-02
TIMESTEP 6091 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX -3.577349e-01
TIMESTEP 6092 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD -1 / Q_MAX -2.057505e+00
----------Random Action----------
TIMESTEP 6093 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.190192e+01
----------Random Action----------
TIMESTEP 6094 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194804e+01
----------Random Action----------
TIMESTEP 6095 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194317e+01
TIMESTEP 6096 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.191071e+01
TIMESTEP 6097 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192417e+01
TIMESTEP 6098 / STATE observe / EPSILON 0.5 / ACTION 0 / REWA

TIMESTEP 6174 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX -1.017254e+01
----------Random Action----------
TIMESTEP 6175 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX -7.867816e+00
----------Random Action----------
TIMESTEP 6176 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX -6.125205e+00
TIMESTEP 6177 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX -4.452178e+00
----------Random Action----------
TIMESTEP 6178 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD -1 / Q_MAX -1.877921e+00
----------Random Action----------
TIMESTEP 6179 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.183764e+01
----------Random Action----------
TIMESTEP 6180 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.183483e+01
TIMESTEP 6181 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.189828e+01
TIMESTEP 6182 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194069e+01
TIMESTEP 6183 / STATE obs

TIMESTEP 6255 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.216413e+01
TIMESTEP 6256 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.226874e+01
----------Random Action----------
TIMESTEP 6257 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.220209e+01
TIMESTEP 6258 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.223425e+01
----------Random Action----------
TIMESTEP 6259 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.217581e+01
TIMESTEP 6260 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.226976e+01
TIMESTEP 6261 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.221624e+01
TIMESTEP 6262 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.227147e+01
----------Random Action----------
TIMESTEP 6263 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.228360e+01
----------Random Action----------
TIMESTEP 6264 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD

TIMESTEP 6336 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.278863e+01
----------Random Action----------
TIMESTEP 6337 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.278981e+01
TIMESTEP 6338 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.271000e+01
TIMESTEP 6339 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.276407e+01
TIMESTEP 6340 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.274383e+01
----------Random Action----------
TIMESTEP 6341 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.276075e+01
----------Random Action----------
TIMESTEP 6342 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.270656e+01
----------Random Action----------
TIMESTEP 6343 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.276598e+01
----------Random Action----------
TIMESTEP 6344 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.272108e+01
----------Random Action------

TIMESTEP 6419 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.206989e+01
----------Random Action----------
TIMESTEP 6420 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212225e+01
TIMESTEP 6421 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.208848e+01
----------Random Action----------
TIMESTEP 6422 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.219976e+01
TIMESTEP 6423 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.229343e+01
TIMESTEP 6424 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.225634e+01
----------Random Action----------
TIMESTEP 6425 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.233070e+01
TIMESTEP 6426 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.229758e+01
----------Random Action----------
TIMESTEP 6427 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.223863e+01
----------Random Action----------
TIMESTEP 6428 / STATE observe

TIMESTEP 6502 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.195640e+01
TIMESTEP 6503 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.188902e+01
TIMESTEP 6504 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.200779e+01
TIMESTEP 6505 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.199078e+01
TIMESTEP 6506 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.198185e+01
----------Random Action----------
TIMESTEP 6507 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.194784e+01
----------Random Action----------
TIMESTEP 6508 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.178671e+01
----------Random Action----------
TIMESTEP 6509 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 8.019894e+00
TIMESTEP 6510 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 6.257667e+00
TIMESTEP 6511 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 5.104805e+00
--------

----------Random Action----------
TIMESTEP 6581 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.270714e+01
----------Random Action----------
TIMESTEP 6582 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.268634e+01
----------Random Action----------
TIMESTEP 6583 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.268410e+01
----------Random Action----------
TIMESTEP 6584 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.255402e+01
TIMESTEP 6585 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.266748e+01
TIMESTEP 6586 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.257394e+01
----------Random Action----------
TIMESTEP 6587 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.277244e+01
TIMESTEP 6588 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.279981e+01
----------Random Action----------
TIMESTEP 6589 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.285119

TIMESTEP 6664 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.198890e+01
----------Random Action----------
TIMESTEP 6665 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.200180e+01
----------Random Action----------
TIMESTEP 6666 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.193758e+01
----------Random Action----------
TIMESTEP 6667 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.196950e+01
----------Random Action----------
TIMESTEP 6668 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.201582e+01
TIMESTEP 6669 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.200521e+01
----------Random Action----------
TIMESTEP 6670 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.204555e+01
TIMESTEP 6671 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202621e+01
TIMESTEP 6672 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.205853e+01
----------Random Action------

TIMESTEP 6742 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.218822e+01
TIMESTEP 6743 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.220361e+01
----------Random Action----------
TIMESTEP 6744 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.210425e+01
----------Random Action----------
TIMESTEP 6745 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.219547e+01
----------Random Action----------
TIMESTEP 6746 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222917e+01
TIMESTEP 6747 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.231331e+01
----------Random Action----------
TIMESTEP 6748 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.238110e+01
TIMESTEP 6749 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.225449e+01
TIMESTEP 6750 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.228183e+01
----------Random Action----------
TIMESTEP 6751 / STATE observe

TIMESTEP 6817 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.233270e+01
----------Random Action----------
TIMESTEP 6818 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.234404e+01
----------Random Action----------
TIMESTEP 6819 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231998e+01
----------Random Action----------
TIMESTEP 6820 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231277e+01
----------Random Action----------
TIMESTEP 6821 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231580e+01
----------Random Action----------
TIMESTEP 6822 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.232802e+01
TIMESTEP 6823 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.227118e+01
----------Random Action----------
TIMESTEP 6824 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.218862e+01
TIMESTEP 6825 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.216163

TIMESTEP 6901 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.233033e+01
TIMESTEP 6902 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.227858e+01
TIMESTEP 6903 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.227678e+01
----------Random Action----------
TIMESTEP 6904 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.229230e+01
TIMESTEP 6905 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.232038e+01
TIMESTEP 6906 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231403e+01
TIMESTEP 6907 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.240051e+01
----------Random Action----------
TIMESTEP 6908 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.238190e+01
----------Random Action----------
TIMESTEP 6909 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.239247e+01
TIMESTEP 6910 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.235634e+01
--------

TIMESTEP 6981 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.257483e+01
----------Random Action----------
TIMESTEP 6982 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.253447e+01
TIMESTEP 6983 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.252175e+01
TIMESTEP 6984 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.254250e+01
TIMESTEP 6985 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.256976e+01
TIMESTEP 6986 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.254507e+01
----------Random Action----------
TIMESTEP 6987 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.261554e+01
TIMESTEP 6988 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.265262e+01
TIMESTEP 6989 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.272250e+01
----------Random Action----------
TIMESTEP 6990 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.274429e+01
--------

TIMESTEP 7061 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 2.797272e+00
----------Random Action----------
TIMESTEP 7062 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 4.059444e+00
TIMESTEP 7063 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 4.348108e+00
TIMESTEP 7064 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 4.084097e+00
TIMESTEP 7065 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 2.755503e+00
----------Random Action----------
TIMESTEP 7066 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 2.452279e+00
----------Random Action----------
TIMESTEP 7067 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 3.869092e+00
----------Random Action----------
TIMESTEP 7068 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 2.766117e+00
TIMESTEP 7069 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD -1 / Q_MAX 2.632213e+00
----------Random Action----------
TIMESTEP 7070 / STATE observe 

TIMESTEP 7140 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.233320e+01
----------Random Action----------
TIMESTEP 7141 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.237056e+01
TIMESTEP 7142 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.239384e+01
TIMESTEP 7143 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.238759e+01
TIMESTEP 7144 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.244381e+01
----------Random Action----------
TIMESTEP 7145 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.249861e+01
----------Random Action----------
TIMESTEP 7146 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.245298e+01
----------Random Action----------
TIMESTEP 7147 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.247462e+01
----------Random Action----------
TIMESTEP 7148 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.244695e+01
----------Random Action------

TIMESTEP 7220 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.181757e+01
TIMESTEP 7221 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.190969e+01
----------Random Action----------
TIMESTEP 7222 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.187822e+01
----------Random Action----------
TIMESTEP 7223 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.191071e+01
TIMESTEP 7224 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192417e+01
----------Random Action----------
TIMESTEP 7225 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194706e+01
----------Random Action----------
TIMESTEP 7226 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194588e+01
TIMESTEP 7227 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.198936e+01
TIMESTEP 7228 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.200960e+01
----------Random Action----------
TIMESTEP 7229 / STATE observe

TIMESTEP 7304 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222549e+01
TIMESTEP 7305 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.223802e+01
----------Random Action----------
TIMESTEP 7306 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.230580e+01
TIMESTEP 7307 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.232872e+01
TIMESTEP 7308 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.236709e+01
TIMESTEP 7309 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.242893e+01
----------Random Action----------
TIMESTEP 7310 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.247607e+01
TIMESTEP 7311 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.253457e+01
TIMESTEP 7312 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.262264e+01
TIMESTEP 7313 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.269149e+01
----------Random Action----------
TIMESTEP

----------Random Action----------
TIMESTEP 7385 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.182890e+01
----------Random Action----------
TIMESTEP 7386 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.190957e+01
TIMESTEP 7387 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.189048e+01
----------Random Action----------
TIMESTEP 7388 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.189007e+01
----------Random Action----------
TIMESTEP 7389 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.186521e+01
----------Random Action----------
TIMESTEP 7390 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.186973e+01
TIMESTEP 7391 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.190562e+01
TIMESTEP 7392 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192542e+01
TIMESTEP 7393 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.191663e+01
TIMESTEP 7394 / STATE observe

TIMESTEP 7464 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.103026e+00
----------Random Action----------
TIMESTEP 7465 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 2.569094e+00
TIMESTEP 7466 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX -6.405194e-02
----------Random Action----------
TIMESTEP 7467 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX -1.562442e+00
TIMESTEP 7468 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.354148e+00
----------Random Action----------
TIMESTEP 7469 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 3.857433e+00
----------Random Action----------
TIMESTEP 7470 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD -1 / Q_MAX 2.666226e+00
TIMESTEP 7471 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.184670e+01
----------Random Action----------
TIMESTEP 7472 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.191301e+01
----------Random Action-----

----------Random Action----------
TIMESTEP 7543 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.216176e+01
----------Random Action----------
TIMESTEP 7544 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.211726e+01
----------Random Action----------
TIMESTEP 7545 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.213932e+01
TIMESTEP 7546 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.213743e+01
TIMESTEP 7547 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.225070e+01
----------Random Action----------
TIMESTEP 7548 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.228451e+01
----------Random Action----------
TIMESTEP 7549 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.226868e+01
----------Random Action----------
TIMESTEP 7550 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.240382e+01
----------Random Action----------
TIMESTEP 7551 / STATE observe / EPSILON 0.5 / ACTI

----------Random Action----------
TIMESTEP 7622 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.271820e+01
TIMESTEP 7623 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.264990e+01
TIMESTEP 7624 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.274586e+01
TIMESTEP 7625 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.279988e+01
TIMESTEP 7626 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.285401e+01
----------Random Action----------
TIMESTEP 7627 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.282504e+01
----------Random Action----------
TIMESTEP 7628 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.287251e+01
TIMESTEP 7629 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.295396e+01
TIMESTEP 7630 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.305356e+01
----------Random Action----------
TIMESTEP 7631 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

TIMESTEP 7702 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.236157e+01
----------Random Action----------
TIMESTEP 7703 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.272394e+01
----------Random Action----------
TIMESTEP 7704 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.256470e+01
----------Random Action----------
TIMESTEP 7705 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.233965e+01
TIMESTEP 7706 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.282636e+01
TIMESTEP 7707 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.283909e+01
TIMESTEP 7708 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.288043e+01
TIMESTEP 7709 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.285560e+01
TIMESTEP 7710 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.288402e+01
TIMESTEP 7711 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.283595e+01
--------

TIMESTEP 7783 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.247268e+01
TIMESTEP 7784 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.240556e+01
----------Random Action----------
TIMESTEP 7785 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.247588e+01
----------Random Action----------
TIMESTEP 7786 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.253660e+01
TIMESTEP 7787 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.252295e+01
TIMESTEP 7788 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.260435e+01
----------Random Action----------
TIMESTEP 7789 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.266131e+01
TIMESTEP 7790 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.271087e+01
----------Random Action----------
TIMESTEP 7791 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.263166e+01
----------Random Action----------
TIMESTEP 7792 / STATE observe

TIMESTEP 7863 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212858e+01
TIMESTEP 7864 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.205954e+01
----------Random Action----------
TIMESTEP 7865 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.209183e+01
TIMESTEP 7866 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.217640e+01
TIMESTEP 7867 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.221421e+01
TIMESTEP 7868 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.220387e+01
----------Random Action----------
TIMESTEP 7869 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.221943e+01
TIMESTEP 7870 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.221971e+01
TIMESTEP 7871 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.218170e+01
----------Random Action----------
TIMESTEP 7872 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.227135e+01
--------

TIMESTEP 7944 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 1 / Q_MAX 1.305707e+01
TIMESTEP 7945 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.214174e+01
----------Random Action----------
TIMESTEP 7946 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.200959e+01
----------Random Action----------
TIMESTEP 7947 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.209427e+01
----------Random Action----------
TIMESTEP 7948 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.140225e+01
----------Random Action----------
TIMESTEP 7949 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD -1 / Q_MAX 1.023854e+01
----------Random Action----------
TIMESTEP 7950 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.199670e+01
----------Random Action----------
TIMESTEP 7951 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.193762e+01
----------Random Action----------
TIMESTEP 7952 / STATE observe / EPSILON 0.5 / ACTION 

TIMESTEP 8022 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.224678e+01
TIMESTEP 8023 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.210607e+01
----------Random Action----------
TIMESTEP 8024 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.221094e+01
TIMESTEP 8025 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.215538e+01
----------Random Action----------
TIMESTEP 8026 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.227066e+01
----------Random Action----------
TIMESTEP 8027 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.236870e+01
----------Random Action----------
TIMESTEP 8028 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.229465e+01
----------Random Action----------
TIMESTEP 8029 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.234113e+01
----------Random Action----------
TIMESTEP 8030 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231430

TIMESTEP 8102 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.260540e+01
----------Random Action----------
TIMESTEP 8103 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.270684e+01
----------Random Action----------
TIMESTEP 8104 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.273136e+01
TIMESTEP 8105 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.273986e+01
TIMESTEP 8106 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.274320e+01
----------Random Action----------
TIMESTEP 8107 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.275157e+01
TIMESTEP 8108 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.291519e+01
TIMESTEP 8109 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.291468e+01
----------Random Action----------
TIMESTEP 8110 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 1 / Q_MAX 1.297598e+01
----------Random Action----------
TIMESTEP 8111 / STATE observe /

TIMESTEP 8183 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.228138e+01
TIMESTEP 8184 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.219631e+01
----------Random Action----------
TIMESTEP 8185 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.216564e+01
----------Random Action----------
TIMESTEP 8186 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 6.802782e+00
TIMESTEP 8187 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX -3.675204e-01
TIMESTEP 8188 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX -2.957921e+00
----------Random Action----------
TIMESTEP 8189 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX -2.700071e+00
TIMESTEP 8190 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX -1.083069e-01
TIMESTEP 8191 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 9.220117e-01
----------Random Action----------
TIMESTEP 8192 / STATE observe / EPSILON 0.5 / ACTION 0 / RE

TIMESTEP 8264 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.203507e+01
TIMESTEP 8265 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.204513e+01
TIMESTEP 8266 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.213555e+01
----------Random Action----------
TIMESTEP 8267 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.216465e+01
TIMESTEP 8268 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.225306e+01
----------Random Action----------
TIMESTEP 8269 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.233947e+01
TIMESTEP 8270 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.219247e+01
----------Random Action----------
TIMESTEP 8271 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222145e+01
----------Random Action----------
TIMESTEP 8272 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212027e+01
----------Random Action----------
TIMESTEP 8273 / STATE observe

TIMESTEP 8340 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.280085e+01
TIMESTEP 8341 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.285517e+01
TIMESTEP 8342 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.295021e+01
TIMESTEP 8343 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.311747e+01
----------Random Action----------
TIMESTEP 8344 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.306258e+01
----------Random Action----------
TIMESTEP 8345 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 1 / Q_MAX 1.313560e+01
----------Random Action----------
TIMESTEP 8346 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.235167e+01
TIMESTEP 8347 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212979e+01
TIMESTEP 8348 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.214657e+01
----------Random Action----------
TIMESTEP 8349 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0

TIMESTEP 8424 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.189374e+01
TIMESTEP 8425 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.189007e+01
----------Random Action----------
TIMESTEP 8426 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.184067e+01
----------Random Action----------
TIMESTEP 8427 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.187737e+01
TIMESTEP 8428 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.189903e+01
----------Random Action----------
TIMESTEP 8429 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.190411e+01
TIMESTEP 8430 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.191350e+01
TIMESTEP 8431 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.190939e+01
----------Random Action----------
TIMESTEP 8432 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.187379e+01
----------Random Action----------
TIMESTEP 8433 / STATE observe

TIMESTEP 8500 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.219336e+01
----------Random Action----------
TIMESTEP 8501 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.215317e+01
----------Random Action----------
TIMESTEP 8502 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.223424e+01
TIMESTEP 8503 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.230489e+01
TIMESTEP 8504 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.224811e+01
----------Random Action----------
TIMESTEP 8505 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231351e+01
TIMESTEP 8506 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.230283e+01
TIMESTEP 8507 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.236372e+01
TIMESTEP 8508 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.240398e+01
----------Random Action----------
TIMESTEP 8509 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

TIMESTEP 8579 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.287206e+01
TIMESTEP 8580 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.301428e+01
----------Random Action----------
TIMESTEP 8581 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.300068e+01
TIMESTEP 8582 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 1 / Q_MAX 1.303250e+01
----------Random Action----------
TIMESTEP 8583 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.221430e+01
----------Random Action----------
TIMESTEP 8584 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.199778e+01
----------Random Action----------
TIMESTEP 8585 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.213624e+01
----------Random Action----------
TIMESTEP 8586 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212307e+01
----------Random Action----------
TIMESTEP 8587 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.216656e+

----------Random Action----------
TIMESTEP 8659 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.186291e+01
TIMESTEP 8660 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.207199e+01
----------Random Action----------
TIMESTEP 8661 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.207386e+01
----------Random Action----------
TIMESTEP 8662 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.214905e+01
----------Random Action----------
TIMESTEP 8663 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.213699e+01
----------Random Action----------
TIMESTEP 8664 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.215121e+01
TIMESTEP 8665 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.214857e+01
----------Random Action----------
TIMESTEP 8666 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.219443e+01
----------Random Action----------
TIMESTEP 8667 / STATE observe / EPSILON 0.5 / ACTI

TIMESTEP 8736 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.266211e+01
----------Random Action----------
TIMESTEP 8737 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.274692e+01
TIMESTEP 8738 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.274648e+01
TIMESTEP 8739 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.282485e+01
----------Random Action----------
TIMESTEP 8740 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.288735e+01
TIMESTEP 8741 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.106247e+01
TIMESTEP 8742 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD -1 / Q_MAX 5.499800e+00
----------Random Action----------
TIMESTEP 8743 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.200807e+01
TIMESTEP 8744 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.185314e+01
TIMESTEP 8745 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.195666e+01
---------

TIMESTEP 8815 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.215373e+01
----------Random Action----------
TIMESTEP 8816 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.217321e+01
TIMESTEP 8817 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.226202e+01
----------Random Action----------
TIMESTEP 8818 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.224608e+01
----------Random Action----------
TIMESTEP 8819 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212986e+01
TIMESTEP 8820 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.215775e+01
TIMESTEP 8821 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212103e+01
----------Random Action----------
TIMESTEP 8822 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.213809e+01
TIMESTEP 8823 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.223769e+01
----------Random Action----------
TIMESTEP 8824 / STATE observe

TIMESTEP 8896 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.239275e+01
TIMESTEP 8897 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.245176e+01
TIMESTEP 8898 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.230374e+01
TIMESTEP 8899 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.225106e+01
----------Random Action----------
TIMESTEP 8900 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.234840e+01
----------Random Action----------
TIMESTEP 8901 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.233977e+01
----------Random Action----------
TIMESTEP 8902 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.232191e+01
----------Random Action----------
TIMESTEP 8903 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 9.701467e+00
TIMESTEP 8904 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX -7.022497e+00
----------Random Action----------
TIMESTEP 8905 / STATE observ

TIMESTEP 8978 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.226033e+01
----------Random Action----------
TIMESTEP 8979 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.230214e+01
TIMESTEP 8980 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 3.921385e+00
----------Random Action----------
TIMESTEP 8981 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX -9.420899e+00
----------Random Action----------
TIMESTEP 8982 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX -7.485559e+00
TIMESTEP 8983 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX -6.421409e+00
TIMESTEP 8984 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX -5.424182e+00
TIMESTEP 8985 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX -2.755055e+00
----------Random Action----------
TIMESTEP 8986 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX -7.841675e-01
----------Random Action----------
TIMESTEP 8987 / STATE o

----------Random Action----------
TIMESTEP 9058 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.198138e+01
----------Random Action----------
TIMESTEP 9059 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.198394e+01
----------Random Action----------
TIMESTEP 9060 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202905e+01
----------Random Action----------
TIMESTEP 9061 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.204457e+01
TIMESTEP 9062 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.206704e+01
TIMESTEP 9063 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.205453e+01
TIMESTEP 9064 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.209355e+01
----------Random Action----------
TIMESTEP 9065 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.207816e+01
----------Random Action----------
TIMESTEP 9066 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.206481

TIMESTEP 9136 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.217943e+01
TIMESTEP 9137 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.217470e+01
TIMESTEP 9138 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.226735e+01
TIMESTEP 9139 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.234909e+01
----------Random Action----------
TIMESTEP 9140 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.232585e+01
TIMESTEP 9141 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.248883e+01
----------Random Action----------
TIMESTEP 9142 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.252526e+01
TIMESTEP 9143 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.250915e+01
----------Random Action----------
TIMESTEP 9144 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.247674e+01
----------Random Action----------
TIMESTEP 9145 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

TIMESTEP 9216 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.239001e+01
----------Random Action----------
TIMESTEP 9217 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.239479e+01
TIMESTEP 9218 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.236869e+01
----------Random Action----------
TIMESTEP 9219 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.236491e+01
----------Random Action----------
TIMESTEP 9220 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.243949e+01
TIMESTEP 9221 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.246215e+01
----------Random Action----------
TIMESTEP 9222 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.243010e+01
TIMESTEP 9223 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.239596e+01
TIMESTEP 9224 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.237490e+01
----------Random Action----------
TIMESTEP 9225 / STATE observe

TIMESTEP 9294 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.191194e+01
----------Random Action----------
TIMESTEP 9295 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194157e+01
----------Random Action----------
TIMESTEP 9296 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.203783e+01
TIMESTEP 9297 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.207036e+01
----------Random Action----------
TIMESTEP 9298 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.201652e+01
TIMESTEP 9299 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.212803e+01
TIMESTEP 9300 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.220184e+01
----------Random Action----------
TIMESTEP 9301 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222593e+01
----------Random Action----------
TIMESTEP 9302 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.224711e+01
----------Random Action------

TIMESTEP 9372 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.279229e+01
TIMESTEP 9373 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.277718e+01
----------Random Action----------
TIMESTEP 9374 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.284641e+01
----------Random Action----------
TIMESTEP 9375 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.290704e+01
TIMESTEP 9376 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.196726e+01
----------Random Action----------
TIMESTEP 9377 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX -1.703666e-01
TIMESTEP 9378 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD -1 / Q_MAX 1.364849e+00
----------Random Action----------
TIMESTEP 9379 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.187215e+01
TIMESTEP 9380 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192573e+01
TIMESTEP 9381 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

----------Random Action----------
TIMESTEP 9454 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.234650e+01
TIMESTEP 9455 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.231649e+01
----------Random Action----------
TIMESTEP 9456 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.240965e+01
TIMESTEP 9457 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.244591e+01
----------Random Action----------
TIMESTEP 9458 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.248395e+01
----------Random Action----------
TIMESTEP 9459 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.249648e+01
----------Random Action----------
TIMESTEP 9460 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.263167e+01
TIMESTEP 9461 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.263476e+01
TIMESTEP 9462 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.266595e+01
----------Random Action------

TIMESTEP 9533 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.218786e+01
TIMESTEP 9534 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.273200e+01
TIMESTEP 9535 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.272831e+01
TIMESTEP 9536 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.277345e+01
TIMESTEP 9537 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.295807e+01
TIMESTEP 9538 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.292113e+01
TIMESTEP 9539 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 1 / Q_MAX 1.297491e+01
TIMESTEP 9540 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.217189e+01
TIMESTEP 9541 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192463e+01
TIMESTEP 9542 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.200843e+01
----------Random Action----------
TIMESTEP 9543 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.

TIMESTEP 9611 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.050651e+00
TIMESTEP 9612 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.707404e+00
TIMESTEP 9613 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 3.814143e-01
TIMESTEP 9614 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD -1 / Q_MAX 3.633067e+00
----------Random Action----------
TIMESTEP 9615 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.183325e+01
TIMESTEP 9616 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192899e+01
TIMESTEP 9617 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.193392e+01
TIMESTEP 9618 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.194069e+01
----------Random Action----------
TIMESTEP 9619 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.191483e+01
TIMESTEP 9620 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.193253e+01
TIMESTEP 9621 / STATE observe / EPSILON 0.5

TIMESTEP 9690 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.191957e+01
----------Random Action----------
TIMESTEP 9691 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192028e+01
----------Random Action----------
TIMESTEP 9692 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.197691e+01
----------Random Action----------
TIMESTEP 9693 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.197813e+01
----------Random Action----------
TIMESTEP 9694 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.192845e+01
----------Random Action----------
TIMESTEP 9695 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.191605e+01
TIMESTEP 9696 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.189286e+01
TIMESTEP 9697 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.187319e+01
TIMESTEP 9698 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.205917e+01
----------Random Action------

TIMESTEP 9770 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.196556e+01
----------Random Action----------
TIMESTEP 9771 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.207006e+01
TIMESTEP 9772 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.202684e+01
----------Random Action----------
TIMESTEP 9773 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.217307e+01
----------Random Action----------
TIMESTEP 9774 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.224375e+01
----------Random Action----------
TIMESTEP 9775 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.217628e+01
TIMESTEP 9776 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.227366e+01
----------Random Action----------
TIMESTEP 9777 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.228428e+01
----------Random Action----------
TIMESTEP 9778 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.227064

TIMESTEP 9848 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.253795e+01
TIMESTEP 9849 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.245920e+01
TIMESTEP 9850 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.247231e+01
TIMESTEP 9851 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.241191e+01
----------Random Action----------
TIMESTEP 9852 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.243776e+01
----------Random Action----------
TIMESTEP 9853 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.241368e+01
----------Random Action----------
TIMESTEP 9854 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.247729e+01
----------Random Action----------
TIMESTEP 9855 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.250105e+01
TIMESTEP 9856 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.257828e+01
TIMESTEP 9857 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD

TIMESTEP 9929 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.259052e+01
----------Random Action----------
TIMESTEP 9930 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.262229e+01
----------Random Action----------
TIMESTEP 9931 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.274730e+01
----------Random Action----------
TIMESTEP 9932 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.269903e+01
TIMESTEP 9933 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.262952e+01
TIMESTEP 9934 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.265536e+01
----------Random Action----------
TIMESTEP 9935 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.268340e+01
TIMESTEP 9936 / STATE observe / EPSILON 0.5 / ACTION 0 / REWARD 0.1 / Q_MAX 1.268543e+01
----------Random Action----------
TIMESTEP 9937 / STATE observe / EPSILON 0.5 / ACTION 1 / REWARD 0.1 / Q_MAX 1.264328e+01
----------Random Action------

TIMESTEP 10007 / STATE explore / EPSILON 0.4999990001999999 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222924e+01
----------Random Action----------
TIMESTEP 10008 / STATE explore / EPSILON 0.4999988335666665 / ACTION 0 / REWARD 0.1 / Q_MAX 1.218882e+01
----------Random Action----------
TIMESTEP 10009 / STATE explore / EPSILON 0.49999866693333317 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222443e+01
----------Random Action----------
TIMESTEP 10010 / STATE explore / EPSILON 0.4999985002999998 / ACTION 0 / REWARD 0.1 / Q_MAX 1.222479e+01
----------Random Action----------
TIMESTEP 10011 / STATE explore / EPSILON 0.49999833366666646 / ACTION 0 / REWARD 0.1 / Q_MAX 1.227346e+01
----------Random Action----------
