In [1]:
import tensorflow as tf
import cv2
import sys
sys.path.append("Wrapped Game Code/")
import pong_fun as game # whichever is imported "as game" will be used
import dummy_game
#import tetris_fun as game
import random
import time 
import numpy as np
from collections import deque

pygame 1.9.4
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
GAME = 'pong' # the name of the game being played for log files
ACTIONS = 6 # number of valid actions
GAMMA = 0.99 # decay rate of past observations
OBSERVE = 500. # timesteps to observe before training
EXPLORE = 500. # frames over which to anneal epsilon
FINAL_EPSILON = 0.05 # final value of epsilon
INITIAL_EPSILON = 1.0 # starting value of epsilon
REPLAY_MEMORY = 50000 # number of previous transitions to remember
BATCH = 32 # size of minibatch
K = 1 # only select an action every Kth frame, repeat prev for others

In [None]:
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev = 0.01)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.01, shape = shape)
    return tf.Variable(initial)

def conv2d(x, W, stride):
    return tf.nn.conv2d(x, W, strides = [1, stride, stride, 1], padding = "SAME")

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = "SAME")

def createNetwork():
    # network weights
    W_conv1 = weight_variable([8, 8, 4, 32])
    b_conv1 = bias_variable([32])

    W_conv2 = weight_variable([4, 4, 32, 64])
    b_conv2 = bias_variable([64])

    W_conv3 = weight_variable([3, 3, 64, 64])
    b_conv3 = bias_variable([64])
    
    W_fc1 = weight_variable([1600, 512])
    b_fc1 = bias_variable([512])

    W_fc2 = weight_variable([512, ACTIONS])
    b_fc2 = bias_variable([ACTIONS])

    # input layer
    s = tf.placeholder("float", [None, 80, 80, 4])

    # hidden layers
    h_conv1 = tf.nn.relu(conv2d(s, W_conv1, 4) + b_conv1)
    h_pool1 = max_pool_2x2(h_conv1)

    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2, 2) + b_conv2)
    #h_pool2 = max_pool_2x2(h_conv2)

    h_conv3 = tf.nn.relu(conv2d(h_conv2, W_conv3, 1) + b_conv3)
    #h_pool3 = max_pool_2x2(h_conv3)

    #h_pool3_flat = tf.reshape(h_pool3, [-1, 256])
    h_conv3_flat = tf.reshape(h_conv3, [-1, 1600])

    h_fc1 = tf.nn.relu(tf.matmul(h_conv3_flat, W_fc1) + b_fc1)

    # readout layer
    readout = tf.matmul(h_fc1, W_fc2) + b_fc2

    return s, readout, h_fc1

In [3]:
def trainNetwork(s, readout, h_fc1, sess):
    tick = time.time()
    # define the cost function
    a = tf.placeholder("float", [None, ACTIONS])
    y = tf.placeholder("float", [None])
    readout_action = tf.reduce_sum(tf.multiply(readout, a), reduction_indices = 1)
    cost = tf.reduce_mean(tf.square(y - readout_action))
    train_step = tf.train.AdamOptimizer(1e-6).minimize(cost)

    # open up a game state to communicate with emulator
    game_state = game.GameState()
    # past 3 wins 
    win_score = []
    win_score.append(0)
    win_score.append(0)
    win_score.append(0)
    win_score.append(0)
    # store the previous observations in replay memory
    D = deque()

    # printing
#     a_file = open("logs_" + GAME + "/readout.txt", 'w')
#     h_file = open("logs_" + GAME + "/hidden.txt", 'w')

    # get the first state by doing nothing and preprocess the image to 80x80x4
    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] = 1
    x_t, r_0, terminal, bar1_score, bar2_score = game_state.frame_step(do_nothing)
    x_t = cv2.cvtColor(cv2.resize(x_t, (80, 80)), cv2.COLOR_BGR2GRAY)
    ret, x_t = cv2.threshold(x_t,1,255,cv2.THRESH_BINARY)
    s_t = np.stack((x_t, x_t, x_t, x_t), axis = 2)

    # saving and loading networks
    saver = tf.train.Saver()
    sess.run(tf.initialize_all_variables())
#     checkpoint = tf.train.get_checkpoint_state("saved_networks")
#     if checkpoint and checkpoint.model_checkpoint_path:
#         saver.restore(sess, checkpoint.model_checkpoint_path)
#         print( "Successfully loaded:", checkpoint.model_checkpoint_path)
#     else:
#         print ("Could not find old network weights")

    epsilon = INITIAL_EPSILON
    t = 0
    while "pigs" != "fly":
        # choose an action epsilon greedily
        readout_t = readout.eval(feed_dict = {s : [s_t]})[0]
        a_t = np.zeros([ACTIONS])
        action_index = 0
        if random.random() <= epsilon or t <= OBSERVE:
            action_index = random.randrange(ACTIONS)
            a_t[action_index] = 1
        else:
            action_index = np.argmax(readout_t)
            a_t[action_index] = 1

        # scale down epsilon
        if epsilon > FINAL_EPSILON and t > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE

        for i in range(0, K):
            # run the selected action and observe next state and reward
            x_t1_col, r_t, terminal, bar1_score, bar2_score = game_state.frame_step(a_t)
            x_t1 = cv2.cvtColor(cv2.resize(x_t1_col, (80, 80)), cv2.COLOR_BGR2GRAY)
            ret, x_t1 = cv2.threshold(x_t1,1,255,cv2.THRESH_BINARY)
            x_t1 = np.reshape(x_t1, (80, 80, 1))
            s_t1 = np.append(x_t1, s_t[:,:,0:3], axis = 2)

            # store the transition in D
            D.append((s_t, a_t, r_t, s_t1, terminal))
            if len(D) > REPLAY_MEMORY:
                D.popleft()
        
        # only train if done observing
        if t > OBSERVE:
            # sample a minibatch to train on
            minibatch = random.sample(D, BATCH)

            # get the batch variables
            s_j_batch = [d[0] for d in minibatch]
            a_batch = [d[1] for d in minibatch]
            r_batch = [d[2] for d in minibatch]
            s_j1_batch = [d[3] for d in minibatch]

            y_batch = []
            readout_j1_batch = readout.eval(feed_dict = {s : s_j1_batch})
            for i in range(0, len(minibatch)):
                # if terminal only equals reward
                if minibatch[i][4]:
                    y_batch.append(r_batch[i])
                else:
                    y_batch.append(r_batch[i] + GAMMA * np.max(readout_j1_batch[i]))

            # perform gradient step
            train_step.run(feed_dict = {
                y : y_batch,
                a : a_batch,
                s : s_j_batch})

        # update the old values
        s_t = s_t1
        t += 1

        # save progress every 10000 iterations
        if t % 10000 == 0:
            saver.save(sess, 'saved_networks/' + GAME + '-dqn', global_step = t)

        # print info
        state = ""
        if t <= OBSERVE:
            state = "observe"
        elif t > OBSERVE and t <= OBSERVE + EXPLORE:
            state = "explore"
        else:
            state = "train"
        #print ("TIMESTEP", t, "/ STATE", state, "/ LINES", game_state.total_lines, "/ EPSILON", epsilon, "/ ACTION", action_index, "/ REWARD", r_t, "/ Q_MAX %e" % np.max(readout_t))
        if r_t!= 0:
            print ("TIMESTEP", t, "/ EPSILON", epsilon, "/ bar1_score", bar1_score, "/ bar2_score", bar2_score, "/ REWARD", r_t, "/ Q_MAX %e" % np.max(readout_t))
        
        
        win_score.pop(0)
        win_score.append(bar1_score - bar2_score)
        if(np.matrix(win_score).sum() > 72): #72
            print("Game_Ends_in Time:",int(time.time() - tick))
            break;   
        # write info to files
        '''
        if t % 10000 <= 100:
            a_file.write(",".join([str(x) for x in readout_t]) + '\n')
            h_file.write(",".join([str(x) for x in h_fc1.eval(feed_dict={s:[s_t]})[0]]) + '\n')
            cv2.imwrite("logs_tetris/frame" + str(t) + ".png", x_t1)
        '''

In [4]:
def playGame():
    sess = tf.InteractiveSession()
    s, readout, h_fc1 = createNetwork()
    trainNetwork(s, readout, h_fc1, sess)

In [None]:
def main():
    playGame()

if __name__ == "__main__":
    tick = time.time()
    main()
    print("Game_Ends_in Time:",int(time.time() - tick))
    print("____________ END HERE _____________")

Instructions for updating:
Use `tf.global_variables_initializer` instead.
TIMESTEP 297 / EPSILON 1.0 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX 1.345254e-02
TIMESTEP 343 / EPSILON 1.0 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX 1.512715e-02
TIMESTEP 389 / EPSILON 1.0 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX 1.380204e-02
TIMESTEP 435 / EPSILON 1.0 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX 1.366744e-02
TIMESTEP 481 / EPSILON 1.0 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX 1.517603e-02
TIMESTEP 527 / EPSILON 0.9505999999999997 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX 1.245254e-02
TIMESTEP 573 / EPSILON 0.8631999999999991 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX 1.255603e-02
TIMESTEP 619 / EPSILON 0.7757999999999985 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX 1.099862e-02
TIMESTEP 665 / EPSILON 0.6883999999999979 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX 1.222987e-02
TIMESTEP 711 / EPSILON 0.6009999999999973 / bar1_score 0

TIMESTEP 5381 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -3.370636e-01
TIMESTEP 5427 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -3.542329e-01
TIMESTEP 5473 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -3.706146e-01
TIMESTEP 5519 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -3.527355e-01
TIMESTEP 5565 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX -3.932435e-01
TIMESTEP 5778 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -3.893628e-01
TIMESTEP 5824 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -3.285970e-01
TIMESTEP 5870 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -3.454937e-01
TIMESTEP 5916 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -3.518159e-01
TIMESTEP 5962 / EPSILON

TIMESTEP 11226 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -3.589627e-01
TIMESTEP 11442 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -5.092101e-01
TIMESTEP 11488 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -6.181339e-01
TIMESTEP 11701 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -5.339075e-01
TIMESTEP 11747 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -3.327041e-01
TIMESTEP 11793 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -1.491739e-01
TIMESTEP 11839 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -6.432466e-01
TIMESTEP 11885 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -9.384148e-02
TIMESTEP 11931 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX -6.739429e-01
TIMESTEP 1

TIMESTEP 15059 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -7.871455e-02
TIMESTEP 15105 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -8.472098e-01
TIMESTEP 15151 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -1.162039e-01
TIMESTEP 15197 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -9.269905e-02
TIMESTEP 15243 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -2.424206e-01
TIMESTEP 15289 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -7.315724e-02
TIMESTEP 15335 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -7.960382e-01
TIMESTEP 15381 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -8.188834e-01
TIMESTEP 15427 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -1.507670e-01
TIMESTEP 1547

TIMESTEP 18509 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -8.894081e-02
TIMESTEP 18555 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -3.572820e-01
TIMESTEP 18601 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -1.747822e-01
TIMESTEP 18647 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -3.779243e-01
TIMESTEP 18693 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX -3.612857e-02
TIMESTEP 18739 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -4.385786e-01
TIMESTEP 18785 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -7.023561e-02
TIMESTEP 18831 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -4.305314e-01
TIMESTEP 18877 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -1.537549e-01
TIMESTEP 18923 / E

TIMESTEP 21959 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -1.092897e+00
TIMESTEP 22005 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -3.613018e-01
TIMESTEP 22051 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX -1.326654e+00
TIMESTEP 22097 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -4.328609e-01
TIMESTEP 22143 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -1.034956e+00
TIMESTEP 22189 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -4.040468e-01
TIMESTEP 22235 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -1.041402e+00
TIMESTEP 22281 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -4.009579e-01
TIMESTEP 22327 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -1.121557e+00
TIMESTEP 22373 /

TIMESTEP 25409 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -6.600122e-01
TIMESTEP 25455 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -1.507963e+00
TIMESTEP 25501 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -7.518764e-01
TIMESTEP 25547 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -1.471992e+00
TIMESTEP 25593 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -7.198005e-01
TIMESTEP 25639 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -1.493876e+00
TIMESTEP 25685 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -6.835884e-01
TIMESTEP 25731 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX -1.605701e+00
TIMESTEP 25777 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -6.158115e-01
TIMESTEP 25

TIMESTEP 28859 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -1.392780e+00
TIMESTEP 28905 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -1.058094e+00
TIMESTEP 28951 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -1.338104e+00
TIMESTEP 28997 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -1.004593e+00
TIMESTEP 29043 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -1.363481e+00
TIMESTEP 29089 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -9.521895e-01
TIMESTEP 29135 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -1.327697e+00
TIMESTEP 29181 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -9.654523e-01
TIMESTEP 29227 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -1.341186e+00
TIMESTEP 2927

TIMESTEP 32646 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -1.292392e+00
TIMESTEP 32692 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -1.031996e+00
TIMESTEP 32738 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -1.172622e+00
TIMESTEP 32784 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -1.101790e+00
TIMESTEP 32997 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX -1.045589e+00
TIMESTEP 33043 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -8.921987e-01
TIMESTEP 33256 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -1.029334e+00
TIMESTEP 33302 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -9.393488e-01
TIMESTEP 33516 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -1.035291e+00
TIMESTEP 33562 / E

TIMESTEP 41786 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -6.791466e-01
TIMESTEP 41832 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -7.647813e-01
TIMESTEP 42046 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -7.387962e-01
TIMESTEP 42092 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -7.651889e-01
TIMESTEP 42389 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 17 / REWARD 1 / Q_MAX 4.890604e-02
TIMESTEP 42518 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 18 / REWARD -1 / Q_MAX -1.211778e-01
TIMESTEP 42564 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 19 / REWARD -1 / Q_MAX -6.436160e-01
TIMESTEP 42778 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -7.022262e-01
TIMESTEP 42824 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -8.419062e-01
TIMESTEP 43037

TIMESTEP 51100 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -8.924590e-01
TIMESTEP 51146 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -6.521279e-01
TIMESTEP 51359 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -8.836104e-01
TIMESTEP 51405 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -6.400250e-01
TIMESTEP 51618 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -9.164969e-01
TIMESTEP 51664 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -7.125944e-01
TIMESTEP 51878 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -7.005895e-01
TIMESTEP 51924 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -5.765054e-01
TIMESTEP 52137 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -9.554304e-01
TIMESTEP 5218

TIMESTEP 59442 / EPSILON 0.04999999999999416 / bar1_score 6 / bar2_score 10 / REWARD -1 / Q_MAX -1.019994e-02
TIMESTEP 59488 / EPSILON 0.04999999999999416 / bar1_score 6 / bar2_score 11 / REWARD -1 / Q_MAX -5.579950e-01
TIMESTEP 59534 / EPSILON 0.04999999999999416 / bar1_score 6 / bar2_score 12 / REWARD -1 / Q_MAX -4.066872e-01
TIMESTEP 59663 / EPSILON 0.04999999999999416 / bar1_score 7 / bar2_score 12 / REWARD 1 / Q_MAX 4.024611e-01
TIMESTEP 59792 / EPSILON 0.04999999999999416 / bar1_score 7 / bar2_score 13 / REWARD -1 / Q_MAX 5.114017e-02
TIMESTEP 59838 / EPSILON 0.04999999999999416 / bar1_score 7 / bar2_score 14 / REWARD -1 / Q_MAX 6.195529e-03
TIMESTEP 59884 / EPSILON 0.04999999999999416 / bar1_score 7 / bar2_score 15 / REWARD -1 / Q_MAX -5.816295e-01
TIMESTEP 59930 / EPSILON 0.04999999999999416 / bar1_score 7 / bar2_score 16 / REWARD -1 / Q_MAX 1.403752e-01
TIMESTEP 59976 / EPSILON 0.04999999999999416 / bar1_score 7 / bar2_score 17 / REWARD -1 / Q_MAX -4.658187e-01
TIMESTEP 60022 

TIMESTEP 66622 / EPSILON 0.04999999999999416 / bar1_score 2 / bar2_score 7 / REWARD -1 / Q_MAX -7.892672e-01
TIMESTEP 66754 / EPSILON 0.04999999999999416 / bar1_score 3 / bar2_score 7 / REWARD 1 / Q_MAX 6.692050e-01
TIMESTEP 66883 / EPSILON 0.04999999999999416 / bar1_score 3 / bar2_score 8 / REWARD -1 / Q_MAX 3.399673e-01
TIMESTEP 66929 / EPSILON 0.04999999999999416 / bar1_score 3 / bar2_score 9 / REWARD -1 / Q_MAX -2.674078e-01
TIMESTEP 67142 / EPSILON 0.04999999999999416 / bar1_score 3 / bar2_score 10 / REWARD -1 / Q_MAX -8.443569e-01
TIMESTEP 67271 / EPSILON 0.04999999999999416 / bar1_score 4 / bar2_score 10 / REWARD 1 / Q_MAX 4.339577e-01
TIMESTEP 67400 / EPSILON 0.04999999999999416 / bar1_score 4 / bar2_score 11 / REWARD -1 / Q_MAX 1.183467e-01
TIMESTEP 67446 / EPSILON 0.04999999999999416 / bar1_score 4 / bar2_score 12 / REWARD -1 / Q_MAX 1.327848e-01
TIMESTEP 67492 / EPSILON 0.04999999999999416 / bar1_score 4 / bar2_score 13 / REWARD -1 / Q_MAX -4.577345e-01
TIMESTEP 67621 / EPSI

TIMESTEP 73630 / EPSILON 0.04999999999999416 / bar1_score 2 / bar2_score 4 / REWARD -1 / Q_MAX -4.511450e-01
TIMESTEP 73676 / EPSILON 0.04999999999999416 / bar1_score 2 / bar2_score 5 / REWARD -1 / Q_MAX 3.087708e-01
TIMESTEP 73722 / EPSILON 0.04999999999999416 / bar1_score 2 / bar2_score 6 / REWARD -1 / Q_MAX -3.766325e-01
TIMESTEP 73851 / EPSILON 0.04999999999999416 / bar1_score 3 / bar2_score 6 / REWARD 1 / Q_MAX 5.499381e-01
TIMESTEP 73980 / EPSILON 0.04999999999999416 / bar1_score 3 / bar2_score 7 / REWARD -1 / Q_MAX -1.885462e-01
TIMESTEP 74111 / EPSILON 0.04999999999999416 / bar1_score 4 / bar2_score 7 / REWARD 1 / Q_MAX 4.443190e-01
TIMESTEP 74240 / EPSILON 0.04999999999999416 / bar1_score 4 / bar2_score 8 / REWARD -1 / Q_MAX -1.904078e-01
TIMESTEP 74286 / EPSILON 0.04999999999999416 / bar1_score 4 / bar2_score 9 / REWARD -1 / Q_MAX -4.399737e-01
TIMESTEP 74332 / EPSILON 0.04999999999999416 / bar1_score 4 / bar2_score 10 / REWARD -1 / Q_MAX -4.494638e-01
TIMESTEP 74378 / EPSILO

TIMESTEP 80297 / EPSILON 0.04999999999999416 / bar1_score 2 / bar2_score 2 / REWARD -1 / Q_MAX -2.700760e-01
TIMESTEP 80343 / EPSILON 0.04999999999999416 / bar1_score 2 / bar2_score 3 / REWARD -1 / Q_MAX -5.123739e-01
TIMESTEP 80389 / EPSILON 0.04999999999999416 / bar1_score 2 / bar2_score 4 / REWARD -1 / Q_MAX -3.391448e-01
TIMESTEP 80519 / EPSILON 0.04999999999999416 / bar1_score 3 / bar2_score 4 / REWARD 1 / Q_MAX 7.268149e-01
TIMESTEP 80648 / EPSILON 0.04999999999999416 / bar1_score 3 / bar2_score 5 / REWARD -1 / Q_MAX -3.748708e-01
TIMESTEP 80694 / EPSILON 0.04999999999999416 / bar1_score 3 / bar2_score 6 / REWARD -1 / Q_MAX -4.350485e-01
TIMESTEP 80740 / EPSILON 0.04999999999999416 / bar1_score 3 / bar2_score 7 / REWARD -1 / Q_MAX -2.658522e-01
TIMESTEP 80869 / EPSILON 0.04999999999999416 / bar1_score 4 / bar2_score 7 / REWARD 1 / Q_MAX 7.562843e-01
TIMESTEP 80998 / EPSILON 0.04999999999999416 / bar1_score 4 / bar2_score 8 / REWARD -1 / Q_MAX -2.861078e-01
TIMESTEP 81044 / EPSILO

TIMESTEP 87473 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 5 / REWARD -1 / Q_MAX -1.036367e-01
TIMESTEP 87519 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 6 / REWARD -1 / Q_MAX 3.875211e-01
TIMESTEP 87565 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 7 / REWARD -1 / Q_MAX 3.065316e-02
TIMESTEP 87611 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 8 / REWARD -1 / Q_MAX 3.738653e-01
TIMESTEP 87657 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 9 / REWARD -1 / Q_MAX -3.433836e-02
TIMESTEP 87703 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 10 / REWARD -1 / Q_MAX 3.581603e-01
TIMESTEP 87749 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 11 / REWARD -1 / Q_MAX -2.175536e-01
TIMESTEP 87795 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 12 / REWARD -1 / Q_MAX 3.016780e-01
TIMESTEP 87841 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 13 / REWARD -1 / Q_MAX -2.789164e-01
TIMESTEP 87887 / EPS

TIMESTEP 94649 / EPSILON 0.04999999999999416 / bar1_score 2 / bar2_score 4 / REWARD -1 / Q_MAX -1.753499e-01
TIMESTEP 94778 / EPSILON 0.04999999999999416 / bar1_score 3 / bar2_score 4 / REWARD 1 / Q_MAX 1.138284e+00
TIMESTEP 94907 / EPSILON 0.04999999999999416 / bar1_score 3 / bar2_score 5 / REWARD -1 / Q_MAX -7.434185e-01
TIMESTEP 95039 / EPSILON 0.04999999999999416 / bar1_score 4 / bar2_score 5 / REWARD 1 / Q_MAX 1.213482e+00
TIMESTEP 95168 / EPSILON 0.04999999999999416 / bar1_score 4 / bar2_score 6 / REWARD -1 / Q_MAX -7.196654e-01
TIMESTEP 95299 / EPSILON 0.04999999999999416 / bar1_score 5 / bar2_score 6 / REWARD 1 / Q_MAX 1.266908e+00
TIMESTEP 95428 / EPSILON 0.04999999999999416 / bar1_score 5 / bar2_score 7 / REWARD -1 / Q_MAX -7.047701e-01
TIMESTEP 95560 / EPSILON 0.04999999999999416 / bar1_score 6 / bar2_score 7 / REWARD 1 / Q_MAX 1.150559e+00
TIMESTEP 95689 / EPSILON 0.04999999999999416 / bar1_score 6 / bar2_score 8 / REWARD -1 / Q_MAX -5.807804e-01
TIMESTEP 95735 / EPSILON 0.

TIMESTEP 102322 / EPSILON 0.04999999999999416 / bar1_score 9 / bar2_score 18 / REWARD -1 / Q_MAX -1.432610e-01
TIMESTEP 102451 / EPSILON 0.04999999999999416 / bar1_score 10 / bar2_score 18 / REWARD 1 / Q_MAX 1.279410e+00
TIMESTEP 102580 / EPSILON 0.04999999999999416 / bar1_score 10 / bar2_score 19 / REWARD -1 / Q_MAX -5.700157e-01
TIMESTEP 102712 / EPSILON 0.04999999999999416 / bar1_score 11 / bar2_score 19 / REWARD 1 / Q_MAX 1.341203e+00
TIMESTEP 102841 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -5.813637e-01
TIMESTEP 102972 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 0 / REWARD 1 / Q_MAX 1.799355e+00
TIMESTEP 103101 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 1 / REWARD -1 / Q_MAX -3.954016e-01
TIMESTEP 103233 / EPSILON 0.04999999999999416 / bar1_score 2 / bar2_score 1 / REWARD 1 / Q_MAX 1.525652e+00
TIMESTEP 103362 / EPSILON 0.04999999999999416 / bar1_score 2 / bar2_score 2 / REWARD -1 / Q_MAX -5.620992e-01
TIMESTEP 10

TIMESTEP 111035 / EPSILON 0.04999999999999416 / bar1_score 2 / bar2_score 1 / REWARD 1 / Q_MAX 2.057985e+00
TIMESTEP 111164 / EPSILON 0.04999999999999416 / bar1_score 2 / bar2_score 2 / REWARD -1 / Q_MAX -3.258449e-01
TIMESTEP 111293 / EPSILON 0.04999999999999416 / bar1_score 3 / bar2_score 2 / REWARD 1 / Q_MAX 1.910777e+00
TIMESTEP 111422 / EPSILON 0.04999999999999416 / bar1_score 3 / bar2_score 3 / REWARD -1 / Q_MAX -5.831599e-01
TIMESTEP 111552 / EPSILON 0.04999999999999416 / bar1_score 4 / bar2_score 3 / REWARD 1 / Q_MAX 1.754088e+00
TIMESTEP 111681 / EPSILON 0.04999999999999416 / bar1_score 4 / bar2_score 4 / REWARD -1 / Q_MAX -4.079845e-01
TIMESTEP 111810 / EPSILON 0.04999999999999416 / bar1_score 5 / bar2_score 4 / REWARD 1 / Q_MAX 1.881579e+00
TIMESTEP 111939 / EPSILON 0.04999999999999416 / bar1_score 5 / bar2_score 5 / REWARD -1 / Q_MAX -4.545042e-01
TIMESTEP 112071 / EPSILON 0.04999999999999416 / bar1_score 6 / bar2_score 5 / REWARD 1 / Q_MAX 1.760192e+00
TIMESTEP 112200 / EP

TIMESTEP 120094 / EPSILON 0.04999999999999416 / bar1_score 4 / bar2_score 4 / REWARD -1 / Q_MAX -3.859937e-01
TIMESTEP 120223 / EPSILON 0.04999999999999416 / bar1_score 5 / bar2_score 4 / REWARD 1 / Q_MAX 1.990239e+00
TIMESTEP 120352 / EPSILON 0.04999999999999416 / bar1_score 5 / bar2_score 5 / REWARD -1 / Q_MAX -4.983242e-01
TIMESTEP 120481 / EPSILON 0.04999999999999416 / bar1_score 6 / bar2_score 5 / REWARD 1 / Q_MAX 2.090320e+00
TIMESTEP 120610 / EPSILON 0.04999999999999416 / bar1_score 6 / bar2_score 6 / REWARD -1 / Q_MAX -3.826030e-01
TIMESTEP 120741 / EPSILON 0.04999999999999416 / bar1_score 7 / bar2_score 6 / REWARD 1 / Q_MAX 2.021922e+00
TIMESTEP 120870 / EPSILON 0.04999999999999416 / bar1_score 7 / bar2_score 7 / REWARD -1 / Q_MAX -3.928563e-01
TIMESTEP 120999 / EPSILON 0.04999999999999416 / bar1_score 8 / bar2_score 7 / REWARD 1 / Q_MAX 2.120566e+00
TIMESTEP 121128 / EPSILON 0.04999999999999416 / bar1_score 8 / bar2_score 8 / REWARD -1 / Q_MAX -3.382554e-01
TIMESTEP 121260 / 

TIMESTEP 129468 / EPSILON 0.04999999999999416 / bar1_score 3 / bar2_score 6 / REWARD 1 / Q_MAX 2.332121e+00
TIMESTEP 129597 / EPSILON 0.04999999999999416 / bar1_score 3 / bar2_score 7 / REWARD -1 / Q_MAX -9.857967e-02
TIMESTEP 129643 / EPSILON 0.04999999999999416 / bar1_score 3 / bar2_score 8 / REWARD -1 / Q_MAX 7.989220e-01
TIMESTEP 129689 / EPSILON 0.04999999999999416 / bar1_score 3 / bar2_score 9 / REWARD -1 / Q_MAX 5.959949e-01
TIMESTEP 129818 / EPSILON 0.04999999999999416 / bar1_score 4 / bar2_score 9 / REWARD 1 / Q_MAX 2.558337e+00
TIMESTEP 129947 / EPSILON 0.04999999999999416 / bar1_score 4 / bar2_score 10 / REWARD -1 / Q_MAX -3.288923e-01
TIMESTEP 130077 / EPSILON 0.04999999999999416 / bar1_score 5 / bar2_score 10 / REWARD 1 / Q_MAX 1.714626e+00
TIMESTEP 130206 / EPSILON 0.04999999999999416 / bar1_score 5 / bar2_score 11 / REWARD -1 / Q_MAX -2.847468e-01
TIMESTEP 130338 / EPSILON 0.04999999999999416 / bar1_score 6 / bar2_score 11 / REWARD 1 / Q_MAX 1.806270e+00
TIMESTEP 130467 

TIMESTEP 138182 / EPSILON 0.04999999999999416 / bar1_score 5 / bar2_score 11 / REWARD -1 / Q_MAX -1.454263e-01
TIMESTEP 138311 / EPSILON 0.04999999999999416 / bar1_score 6 / bar2_score 11 / REWARD 1 / Q_MAX 2.445649e+00
TIMESTEP 138440 / EPSILON 0.04999999999999416 / bar1_score 6 / bar2_score 12 / REWARD -1 / Q_MAX -5.949724e-01
TIMESTEP 138569 / EPSILON 0.04999999999999416 / bar1_score 7 / bar2_score 12 / REWARD 1 / Q_MAX 2.182121e+00
TIMESTEP 138698 / EPSILON 0.04999999999999416 / bar1_score 7 / bar2_score 13 / REWARD -1 / Q_MAX -2.418125e-01
TIMESTEP 138827 / EPSILON 0.04999999999999416 / bar1_score 8 / bar2_score 13 / REWARD 1 / Q_MAX 2.345693e+00
TIMESTEP 138956 / EPSILON 0.04999999999999416 / bar1_score 8 / bar2_score 14 / REWARD -1 / Q_MAX -2.935756e-02
TIMESTEP 139085 / EPSILON 0.04999999999999416 / bar1_score 9 / bar2_score 14 / REWARD 1 / Q_MAX 2.211395e+00
TIMESTEP 139214 / EPSILON 0.04999999999999416 / bar1_score 9 / bar2_score 15 / REWARD -1 / Q_MAX -5.432541e-01
TIMESTEP 

TIMESTEP 147564 / EPSILON 0.04999999999999416 / bar1_score 9 / bar2_score 10 / REWARD 1 / Q_MAX 2.022394e+00
TIMESTEP 147693 / EPSILON 0.04999999999999416 / bar1_score 9 / bar2_score 11 / REWARD -1 / Q_MAX -1.660985e-01
TIMESTEP 147823 / EPSILON 0.04999999999999416 / bar1_score 10 / bar2_score 11 / REWARD 1 / Q_MAX 2.177370e+00
TIMESTEP 147952 / EPSILON 0.04999999999999416 / bar1_score 10 / bar2_score 12 / REWARD -1 / Q_MAX -2.061456e-01
TIMESTEP 148084 / EPSILON 0.04999999999999416 / bar1_score 11 / bar2_score 12 / REWARD 1 / Q_MAX 1.540786e+00
TIMESTEP 148213 / EPSILON 0.04999999999999416 / bar1_score 11 / bar2_score 13 / REWARD -1 / Q_MAX -8.590952e-02
TIMESTEP 148344 / EPSILON 0.04999999999999416 / bar1_score 12 / bar2_score 13 / REWARD 1 / Q_MAX 1.826700e+00
TIMESTEP 148473 / EPSILON 0.04999999999999416 / bar1_score 12 / bar2_score 14 / REWARD -1 / Q_MAX -4.592610e-01
TIMESTEP 148602 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 14 / REWARD 1 / Q_MAX 2.283566e+00
TIME

TIMESTEP 156944 / EPSILON 0.04999999999999416 / bar1_score 10 / bar2_score 10 / REWARD -1 / Q_MAX -1.704369e-01
TIMESTEP 157075 / EPSILON 0.04999999999999416 / bar1_score 11 / bar2_score 10 / REWARD 1 / Q_MAX 1.951220e+00
TIMESTEP 157204 / EPSILON 0.04999999999999416 / bar1_score 11 / bar2_score 11 / REWARD -1 / Q_MAX -1.752638e-01
TIMESTEP 157336 / EPSILON 0.04999999999999416 / bar1_score 12 / bar2_score 11 / REWARD 1 / Q_MAX 1.807956e+00
TIMESTEP 157465 / EPSILON 0.04999999999999416 / bar1_score 12 / bar2_score 12 / REWARD -1 / Q_MAX -5.412883e-02
TIMESTEP 157596 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 12 / REWARD 1 / Q_MAX 1.703889e+00
TIMESTEP 157725 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 13 / REWARD -1 / Q_MAX -3.249026e-01
TIMESTEP 157856 / EPSILON 0.04999999999999416 / bar1_score 14 / bar2_score 13 / REWARD 1 / Q_MAX 1.611817e+00
TIMESTEP 157985 / EPSILON 0.04999999999999416 / bar1_score 14 / bar2_score 14 / REWARD -1 / Q_MAX -2.701165e-01


TIMESTEP 166004 / EPSILON 0.04999999999999416 / bar1_score 12 / bar2_score 13 / REWARD 1 / Q_MAX 1.758551e+00
TIMESTEP 166133 / EPSILON 0.04999999999999416 / bar1_score 12 / bar2_score 14 / REWARD -1 / Q_MAX -2.194952e-01
TIMESTEP 166262 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 14 / REWARD 1 / Q_MAX 1.717278e+00
TIMESTEP 166391 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 15 / REWARD -1 / Q_MAX -2.176818e-01
TIMESTEP 166520 / EPSILON 0.04999999999999416 / bar1_score 14 / bar2_score 15 / REWARD 1 / Q_MAX 1.587052e+00
TIMESTEP 166649 / EPSILON 0.04999999999999416 / bar1_score 14 / bar2_score 16 / REWARD -1 / Q_MAX -4.153998e-01
TIMESTEP 166778 / EPSILON 0.04999999999999416 / bar1_score 15 / bar2_score 16 / REWARD 1 / Q_MAX 1.631420e+00
TIMESTEP 166907 / EPSILON 0.04999999999999416 / bar1_score 15 / bar2_score 17 / REWARD -1 / Q_MAX -3.109641e-01
TIMESTEP 166953 / EPSILON 0.04999999999999416 / bar1_score 15 / bar2_score 18 / REWARD -1 / Q_MAX 5.150086e-01
T

TIMESTEP 175199 / EPSILON 0.04999999999999416 / bar1_score 14 / bar2_score 14 / REWARD -1 / Q_MAX -4.776692e-01
TIMESTEP 175328 / EPSILON 0.04999999999999416 / bar1_score 15 / bar2_score 14 / REWARD 1 / Q_MAX 1.495125e+00
TIMESTEP 175457 / EPSILON 0.04999999999999416 / bar1_score 15 / bar2_score 15 / REWARD -1 / Q_MAX -4.136456e-01
TIMESTEP 175588 / EPSILON 0.04999999999999416 / bar1_score 16 / bar2_score 15 / REWARD 1 / Q_MAX 1.404421e+00
TIMESTEP 175717 / EPSILON 0.04999999999999416 / bar1_score 16 / bar2_score 16 / REWARD -1 / Q_MAX -4.767886e-01
TIMESTEP 175846 / EPSILON 0.04999999999999416 / bar1_score 17 / bar2_score 16 / REWARD 1 / Q_MAX 1.438108e+00
TIMESTEP 175975 / EPSILON 0.04999999999999416 / bar1_score 17 / bar2_score 17 / REWARD -1 / Q_MAX -2.511385e-01
TIMESTEP 176104 / EPSILON 0.04999999999999416 / bar1_score 18 / bar2_score 17 / REWARD 1 / Q_MAX 1.523563e+00
TIMESTEP 176233 / EPSILON 0.04999999999999416 / bar1_score 18 / bar2_score 18 / REWARD -1 / Q_MAX -3.804303e-01


TIMESTEP 184397 / EPSILON 0.04999999999999416 / bar1_score 12 / bar2_score 15 / REWARD 1 / Q_MAX 1.309081e+00
TIMESTEP 184526 / EPSILON 0.04999999999999416 / bar1_score 12 / bar2_score 16 / REWARD -1 / Q_MAX -3.976595e-01
TIMESTEP 184572 / EPSILON 0.04999999999999416 / bar1_score 12 / bar2_score 17 / REWARD -1 / Q_MAX 2.561407e-01
TIMESTEP 184618 / EPSILON 0.04999999999999416 / bar1_score 12 / bar2_score 18 / REWARD -1 / Q_MAX -6.383675e-01
TIMESTEP 184749 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 18 / REWARD 1 / Q_MAX 1.354426e+00
TIMESTEP 184878 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 19 / REWARD -1 / Q_MAX -5.246499e-01
TIMESTEP 185007 / EPSILON 0.04999999999999416 / bar1_score 14 / bar2_score 19 / REWARD 1 / Q_MAX 1.136563e+00
TIMESTEP 185136 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -5.055902e-01
TIMESTEP 185182 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX 7.383623e-01
TIME

TIMESTEP 192919 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -5.170790e-01
TIMESTEP 193050 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 0 / REWARD 1 / Q_MAX 1.299361e+00
TIMESTEP 193179 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 1 / REWARD -1 / Q_MAX -5.511379e-01
TIMESTEP 193310 / EPSILON 0.04999999999999416 / bar1_score 2 / bar2_score 1 / REWARD 1 / Q_MAX 1.305584e+00
TIMESTEP 193439 / EPSILON 0.04999999999999416 / bar1_score 2 / bar2_score 2 / REWARD -1 / Q_MAX -4.102058e-01
TIMESTEP 193568 / EPSILON 0.04999999999999416 / bar1_score 3 / bar2_score 2 / REWARD 1 / Q_MAX 1.039578e+00
TIMESTEP 193697 / EPSILON 0.04999999999999416 / bar1_score 3 / bar2_score 3 / REWARD -1 / Q_MAX -6.367906e-01
TIMESTEP 193826 / EPSILON 0.04999999999999416 / bar1_score 4 / bar2_score 3 / REWARD 1 / Q_MAX 1.401316e+00
TIMESTEP 193955 / EPSILON 0.04999999999999416 / bar1_score 4 / bar2_score 4 / REWARD -1 / Q_MAX -5.771393e-01
TIMESTEP 194001 / 

TIMESTEP 202114 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 1 / REWARD 1 / Q_MAX 1.157727e+00
TIMESTEP 202243 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 2 / REWARD -1 / Q_MAX -3.898420e-01
TIMESTEP 202375 / EPSILON 0.04999999999999416 / bar1_score 2 / bar2_score 2 / REWARD 1 / Q_MAX 9.098228e-01
TIMESTEP 202504 / EPSILON 0.04999999999999416 / bar1_score 2 / bar2_score 3 / REWARD -1 / Q_MAX -5.237016e-01
TIMESTEP 202633 / EPSILON 0.04999999999999416 / bar1_score 3 / bar2_score 3 / REWARD 1 / Q_MAX 1.269572e+00
TIMESTEP 202762 / EPSILON 0.04999999999999416 / bar1_score 3 / bar2_score 4 / REWARD -1 / Q_MAX -3.219929e-01
TIMESTEP 202893 / EPSILON 0.04999999999999416 / bar1_score 4 / bar2_score 4 / REWARD 1 / Q_MAX 1.358860e+00
TIMESTEP 203022 / EPSILON 0.04999999999999416 / bar1_score 4 / bar2_score 5 / REWARD -1 / Q_MAX -4.668556e-01
TIMESTEP 203151 / EPSILON 0.04999999999999416 / bar1_score 5 / bar2_score 5 / REWARD 1 / Q_MAX 1.273793e+00
TIMESTEP 203280 / EP

TIMESTEP 210984 / EPSILON 0.04999999999999416 / bar1_score 4 / bar2_score 4 / REWARD -1 / Q_MAX -4.696278e-01
TIMESTEP 211113 / EPSILON 0.04999999999999416 / bar1_score 5 / bar2_score 4 / REWARD 1 / Q_MAX 1.257606e+00
TIMESTEP 211242 / EPSILON 0.04999999999999416 / bar1_score 5 / bar2_score 5 / REWARD -1 / Q_MAX -4.439321e-01
TIMESTEP 211374 / EPSILON 0.04999999999999416 / bar1_score 6 / bar2_score 5 / REWARD 1 / Q_MAX 1.202808e+00
TIMESTEP 211503 / EPSILON 0.04999999999999416 / bar1_score 6 / bar2_score 6 / REWARD -1 / Q_MAX -4.492194e-01
TIMESTEP 211633 / EPSILON 0.04999999999999416 / bar1_score 7 / bar2_score 6 / REWARD 1 / Q_MAX 9.737567e-01
TIMESTEP 211762 / EPSILON 0.04999999999999416 / bar1_score 7 / bar2_score 7 / REWARD -1 / Q_MAX -4.634228e-01
TIMESTEP 211891 / EPSILON 0.04999999999999416 / bar1_score 8 / bar2_score 7 / REWARD 1 / Q_MAX 1.253962e+00
TIMESTEP 212020 / EPSILON 0.04999999999999416 / bar1_score 8 / bar2_score 8 / REWARD -1 / Q_MAX -5.923316e-01
TIMESTEP 212066 / 

TIMESTEP 219604 / EPSILON 0.04999999999999416 / bar1_score 5 / bar2_score 8 / REWARD -1 / Q_MAX 4.055458e-01
TIMESTEP 219650 / EPSILON 0.04999999999999416 / bar1_score 5 / bar2_score 9 / REWARD -1 / Q_MAX -5.193279e-01
TIMESTEP 219779 / EPSILON 0.04999999999999416 / bar1_score 6 / bar2_score 9 / REWARD 1 / Q_MAX 1.238379e+00
TIMESTEP 219908 / EPSILON 0.04999999999999416 / bar1_score 6 / bar2_score 10 / REWARD -1 / Q_MAX -4.523178e-01
TIMESTEP 219954 / EPSILON 0.04999999999999416 / bar1_score 6 / bar2_score 11 / REWARD -1 / Q_MAX 4.856122e-01
TIMESTEP 220000 / EPSILON 0.04999999999999416 / bar1_score 6 / bar2_score 12 / REWARD -1 / Q_MAX -9.677792e-01
TIMESTEP 220129 / EPSILON 0.04999999999999416 / bar1_score 7 / bar2_score 12 / REWARD 1 / Q_MAX 1.174846e+00
TIMESTEP 220258 / EPSILON 0.04999999999999416 / bar1_score 7 / bar2_score 13 / REWARD -1 / Q_MAX -5.368511e-01
TIMESTEP 220389 / EPSILON 0.04999999999999416 / bar1_score 8 / bar2_score 13 / REWARD 1 / Q_MAX 1.280946e+00
TIMESTEP 220

TIMESTEP 227813 / EPSILON 0.04999999999999416 / bar1_score 10 / bar2_score 17 / REWARD 1 / Q_MAX 1.215040e+00
TIMESTEP 227942 / EPSILON 0.04999999999999416 / bar1_score 10 / bar2_score 18 / REWARD -1 / Q_MAX -5.577334e-01
TIMESTEP 228073 / EPSILON 0.04999999999999416 / bar1_score 11 / bar2_score 18 / REWARD 1 / Q_MAX 1.116625e+00
TIMESTEP 228202 / EPSILON 0.04999999999999416 / bar1_score 11 / bar2_score 19 / REWARD -1 / Q_MAX -6.066701e-01
TIMESTEP 228333 / EPSILON 0.04999999999999416 / bar1_score 12 / bar2_score 19 / REWARD 1 / Q_MAX 1.033763e+00
TIMESTEP 228462 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -6.378675e-01
TIMESTEP 228591 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 0 / REWARD 1 / Q_MAX 1.234180e+00
TIMESTEP 228720 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 1 / REWARD -1 / Q_MAX -4.975701e-01
TIMESTEP 228852 / EPSILON 0.04999999999999416 / bar1_score 2 / bar2_score 1 / REWARD 1 / Q_MAX 1.218715e+00
TIMESTEP 2

TIMESTEP 235179 / EPSILON 0.04999999999999416 / bar1_score 7 / bar2_score 9 / REWARD -1 / Q_MAX -8.327747e-01
TIMESTEP 235309 / EPSILON 0.04999999999999416 / bar1_score 8 / bar2_score 9 / REWARD 1 / Q_MAX 1.099647e+00
TIMESTEP 235438 / EPSILON 0.04999999999999416 / bar1_score 8 / bar2_score 10 / REWARD -1 / Q_MAX -5.789062e-01
TIMESTEP 235568 / EPSILON 0.04999999999999416 / bar1_score 9 / bar2_score 10 / REWARD 1 / Q_MAX 9.402788e-01
TIMESTEP 235697 / EPSILON 0.04999999999999416 / bar1_score 9 / bar2_score 11 / REWARD -1 / Q_MAX -5.314339e-01
TIMESTEP 235826 / EPSILON 0.04999999999999416 / bar1_score 10 / bar2_score 11 / REWARD 1 / Q_MAX 1.021464e+00
TIMESTEP 235955 / EPSILON 0.04999999999999416 / bar1_score 10 / bar2_score 12 / REWARD -1 / Q_MAX -6.229882e-01
TIMESTEP 236001 / EPSILON 0.04999999999999416 / bar1_score 10 / bar2_score 13 / REWARD -1 / Q_MAX 5.380455e-02
TIMESTEP 236047 / EPSILON 0.04999999999999416 / bar1_score 10 / bar2_score 14 / REWARD -1 / Q_MAX -8.888456e-01
TIMEST

TIMESTEP 243698 / EPSILON 0.04999999999999416 / bar1_score 12 / bar2_score 13 / REWARD 1 / Q_MAX 1.238898e+00
TIMESTEP 243827 / EPSILON 0.04999999999999416 / bar1_score 12 / bar2_score 14 / REWARD -1 / Q_MAX -3.598491e-01
TIMESTEP 243956 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 14 / REWARD 1 / Q_MAX 1.370501e+00
TIMESTEP 244085 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 15 / REWARD -1 / Q_MAX -3.581077e-01
TIMESTEP 244214 / EPSILON 0.04999999999999416 / bar1_score 14 / bar2_score 15 / REWARD 1 / Q_MAX 1.306046e+00
TIMESTEP 244343 / EPSILON 0.04999999999999416 / bar1_score 14 / bar2_score 16 / REWARD -1 / Q_MAX -4.834982e-01
TIMESTEP 244473 / EPSILON 0.04999999999999416 / bar1_score 15 / bar2_score 16 / REWARD 1 / Q_MAX 1.202827e+00
TIMESTEP 244602 / EPSILON 0.04999999999999416 / bar1_score 15 / bar2_score 17 / REWARD -1 / Q_MAX -4.448907e-01
TIMESTEP 244731 / EPSILON 0.04999999999999416 / bar1_score 16 / bar2_score 17 / REWARD 1 / Q_MAX 1.242503e+00
TI

TIMESTEP 252881 / EPSILON 0.04999999999999416 / bar1_score 8 / bar2_score 15 / REWARD -1 / Q_MAX -5.432258e-01
TIMESTEP 253010 / EPSILON 0.04999999999999416 / bar1_score 9 / bar2_score 15 / REWARD 1 / Q_MAX 1.267645e+00
TIMESTEP 253139 / EPSILON 0.04999999999999416 / bar1_score 9 / bar2_score 16 / REWARD -1 / Q_MAX -6.869252e-01
TIMESTEP 253268 / EPSILON 0.04999999999999416 / bar1_score 10 / bar2_score 16 / REWARD 1 / Q_MAX 1.060759e+00
TIMESTEP 253397 / EPSILON 0.04999999999999416 / bar1_score 10 / bar2_score 17 / REWARD -1 / Q_MAX -4.060922e-01
TIMESTEP 253526 / EPSILON 0.04999999999999416 / bar1_score 11 / bar2_score 17 / REWARD 1 / Q_MAX 1.236003e+00
TIMESTEP 253655 / EPSILON 0.04999999999999416 / bar1_score 11 / bar2_score 18 / REWARD -1 / Q_MAX -5.817467e-01
TIMESTEP 253784 / EPSILON 0.04999999999999416 / bar1_score 12 / bar2_score 18 / REWARD 1 / Q_MAX 1.289492e+00
TIMESTEP 253913 / EPSILON 0.04999999999999416 / bar1_score 12 / bar2_score 19 / REWARD -1 / Q_MAX -4.578144e-01
TIM

TIMESTEP 261893 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 16 / REWARD 1 / Q_MAX 1.262484e+00
TIMESTEP 262022 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 17 / REWARD -1 / Q_MAX -4.411938e-01
TIMESTEP 262068 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 18 / REWARD -1 / Q_MAX -1.198164e+00
TIMESTEP 262114 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 19 / REWARD -1 / Q_MAX -6.155227e-01
TIMESTEP 262243 / EPSILON 0.04999999999999416 / bar1_score 14 / bar2_score 19 / REWARD 1 / Q_MAX 1.147374e+00
TIMESTEP 262372 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -6.278055e-01
TIMESTEP 262418 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -1.035815e+00
TIMESTEP 262464 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -7.364851e-01
TIMESTEP 262510 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -1.056576e+00
TIME

TIMESTEP 270907 / EPSILON 0.04999999999999416 / bar1_score 15 / bar2_score 19 / REWARD -1 / Q_MAX -1.608170e-01
TIMESTEP 271036 / EPSILON 0.04999999999999416 / bar1_score 16 / bar2_score 19 / REWARD 1 / Q_MAX 1.238246e+00
TIMESTEP 271165 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -6.198910e-01
TIMESTEP 271211 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -1.178978e+00
TIMESTEP 271257 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -5.446128e-01
TIMESTEP 271386 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 2 / REWARD 1 / Q_MAX 1.317715e+00
TIMESTEP 271515 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 3 / REWARD -1 / Q_MAX -6.980161e-01
TIMESTEP 271644 / EPSILON 0.04999999999999416 / bar1_score 2 / bar2_score 3 / REWARD 1 / Q_MAX 1.274927e+00
TIMESTEP 271773 / EPSILON 0.04999999999999416 / bar1_score 2 / bar2_score 4 / REWARD -1 / Q_MAX -5.380843e-01
TIMESTEP 271

TIMESTEP 280253 / EPSILON 0.04999999999999416 / bar1_score 17 / bar2_score 18 / REWARD 1 / Q_MAX 1.276421e+00
TIMESTEP 280382 / EPSILON 0.04999999999999416 / bar1_score 17 / bar2_score 19 / REWARD -1 / Q_MAX -4.101084e-01
TIMESTEP 280511 / EPSILON 0.04999999999999416 / bar1_score 18 / bar2_score 19 / REWARD 1 / Q_MAX 1.187622e+00
TIMESTEP 280640 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -5.760316e-01
TIMESTEP 280769 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 0 / REWARD 1 / Q_MAX 1.306963e+00
TIMESTEP 280898 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 1 / REWARD -1 / Q_MAX -5.221328e-01
TIMESTEP 281027 / EPSILON 0.04999999999999416 / bar1_score 2 / bar2_score 1 / REWARD 1 / Q_MAX 1.377210e+00
TIMESTEP 281156 / EPSILON 0.04999999999999416 / bar1_score 2 / bar2_score 2 / REWARD -1 / Q_MAX -2.866669e-01
TIMESTEP 281285 / EPSILON 0.04999999999999416 / bar1_score 3 / bar2_score 2 / REWARD 1 / Q_MAX 1.301042e+00
TIMESTEP 28141

TIMESTEP 289928 / EPSILON 0.04999999999999416 / bar1_score 16 / bar2_score 17 / REWARD -1 / Q_MAX -4.275116e-01
TIMESTEP 290057 / EPSILON 0.04999999999999416 / bar1_score 17 / bar2_score 17 / REWARD 1 / Q_MAX 1.249125e+00
TIMESTEP 290186 / EPSILON 0.04999999999999416 / bar1_score 17 / bar2_score 18 / REWARD -1 / Q_MAX -4.418401e-01
TIMESTEP 290315 / EPSILON 0.04999999999999416 / bar1_score 18 / bar2_score 18 / REWARD 1 / Q_MAX 1.250028e+00
TIMESTEP 290444 / EPSILON 0.04999999999999416 / bar1_score 18 / bar2_score 19 / REWARD -1 / Q_MAX -4.691104e-01
TIMESTEP 290573 / EPSILON 0.04999999999999416 / bar1_score 19 / bar2_score 19 / REWARD 1 / Q_MAX 1.095411e+00
TIMESTEP 290702 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -5.807406e-01
TIMESTEP 290831 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 0 / REWARD 1 / Q_MAX 1.244659e+00
TIMESTEP 290960 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 1 / REWARD -1 / Q_MAX -3.735424e-01
TIMEST

TIMESTEP 299607 / EPSILON 0.04999999999999416 / bar1_score 15 / bar2_score 15 / REWARD 1 / Q_MAX 1.192430e+00
TIMESTEP 299736 / EPSILON 0.04999999999999416 / bar1_score 15 / bar2_score 16 / REWARD -1 / Q_MAX -4.451922e-01
TIMESTEP 299865 / EPSILON 0.04999999999999416 / bar1_score 16 / bar2_score 16 / REWARD 1 / Q_MAX 1.164278e+00
TIMESTEP 299994 / EPSILON 0.04999999999999416 / bar1_score 16 / bar2_score 17 / REWARD -1 / Q_MAX -5.597330e-01
TIMESTEP 300123 / EPSILON 0.04999999999999416 / bar1_score 17 / bar2_score 17 / REWARD 1 / Q_MAX 1.225549e+00
TIMESTEP 300252 / EPSILON 0.04999999999999416 / bar1_score 17 / bar2_score 18 / REWARD -1 / Q_MAX -3.488045e-01
TIMESTEP 300383 / EPSILON 0.04999999999999416 / bar1_score 18 / bar2_score 18 / REWARD 1 / Q_MAX 1.235972e+00
TIMESTEP 300512 / EPSILON 0.04999999999999416 / bar1_score 18 / bar2_score 19 / REWARD -1 / Q_MAX -4.061023e-01
TIMESTEP 300641 / EPSILON 0.04999999999999416 / bar1_score 19 / bar2_score 19 / REWARD 1 / Q_MAX 1.104381e+00
TI

TIMESTEP 309121 / EPSILON 0.04999999999999416 / bar1_score 14 / bar2_score 14 / REWARD -1 / Q_MAX -5.523289e-01
TIMESTEP 309250 / EPSILON 0.04999999999999416 / bar1_score 15 / bar2_score 14 / REWARD 1 / Q_MAX 1.212450e+00
TIMESTEP 309379 / EPSILON 0.04999999999999416 / bar1_score 15 / bar2_score 15 / REWARD -1 / Q_MAX -4.543731e-01
TIMESTEP 309508 / EPSILON 0.04999999999999416 / bar1_score 16 / bar2_score 15 / REWARD 1 / Q_MAX 1.215544e+00
TIMESTEP 309637 / EPSILON 0.04999999999999416 / bar1_score 16 / bar2_score 16 / REWARD -1 / Q_MAX -4.656758e-01
TIMESTEP 309766 / EPSILON 0.04999999999999416 / bar1_score 17 / bar2_score 16 / REWARD 1 / Q_MAX 1.085585e+00
TIMESTEP 309895 / EPSILON 0.04999999999999416 / bar1_score 17 / bar2_score 17 / REWARD -1 / Q_MAX -4.366048e-01
TIMESTEP 310024 / EPSILON 0.04999999999999416 / bar1_score 18 / bar2_score 17 / REWARD 1 / Q_MAX 1.153832e+00
TIMESTEP 310153 / EPSILON 0.04999999999999416 / bar1_score 18 / bar2_score 18 / REWARD -1 / Q_MAX -4.876950e-01


TIMESTEP 318723 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 14 / REWARD -1 / Q_MAX -4.994666e-01
TIMESTEP 318852 / EPSILON 0.04999999999999416 / bar1_score 14 / bar2_score 14 / REWARD 1 / Q_MAX 1.087167e+00
TIMESTEP 318981 / EPSILON 0.04999999999999416 / bar1_score 14 / bar2_score 15 / REWARD -1 / Q_MAX -4.356822e-01
TIMESTEP 319110 / EPSILON 0.04999999999999416 / bar1_score 15 / bar2_score 15 / REWARD 1 / Q_MAX 1.249406e+00
TIMESTEP 319239 / EPSILON 0.04999999999999416 / bar1_score 15 / bar2_score 16 / REWARD -1 / Q_MAX -3.820424e-01
TIMESTEP 319368 / EPSILON 0.04999999999999416 / bar1_score 16 / bar2_score 16 / REWARD 1 / Q_MAX 1.167267e+00
TIMESTEP 319497 / EPSILON 0.04999999999999416 / bar1_score 16 / bar2_score 17 / REWARD -1 / Q_MAX -4.857614e-01
TIMESTEP 319626 / EPSILON 0.04999999999999416 / bar1_score 17 / bar2_score 17 / REWARD 1 / Q_MAX 1.123771e+00
TIMESTEP 319755 / EPSILON 0.04999999999999416 / bar1_score 17 / bar2_score 18 / REWARD -1 / Q_MAX -6.326273e-01


TIMESTEP 328490 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 14 / REWARD -1 / Q_MAX -4.276496e-01
TIMESTEP 328619 / EPSILON 0.04999999999999416 / bar1_score 14 / bar2_score 14 / REWARD 1 / Q_MAX 1.171405e+00
TIMESTEP 328748 / EPSILON 0.04999999999999416 / bar1_score 14 / bar2_score 15 / REWARD -1 / Q_MAX -4.896366e-01
TIMESTEP 328877 / EPSILON 0.04999999999999416 / bar1_score 15 / bar2_score 15 / REWARD 1 / Q_MAX 1.026929e+00
TIMESTEP 329006 / EPSILON 0.04999999999999416 / bar1_score 15 / bar2_score 16 / REWARD -1 / Q_MAX -5.316737e-01
TIMESTEP 329135 / EPSILON 0.04999999999999416 / bar1_score 16 / bar2_score 16 / REWARD 1 / Q_MAX 1.097627e+00
TIMESTEP 329264 / EPSILON 0.04999999999999416 / bar1_score 16 / bar2_score 17 / REWARD -1 / Q_MAX -4.572615e-01
TIMESTEP 329393 / EPSILON 0.04999999999999416 / bar1_score 17 / bar2_score 17 / REWARD 1 / Q_MAX 1.175856e+00
TIMESTEP 329522 / EPSILON 0.04999999999999416 / bar1_score 17 / bar2_score 18 / REWARD -1 / Q_MAX -4.695598e-01


TIMESTEP 338000 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 13 / REWARD 1 / Q_MAX 1.093314e+00
TIMESTEP 338129 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 14 / REWARD -1 / Q_MAX -3.752609e-01
TIMESTEP 338258 / EPSILON 0.04999999999999416 / bar1_score 14 / bar2_score 14 / REWARD 1 / Q_MAX 1.213150e+00
TIMESTEP 338387 / EPSILON 0.04999999999999416 / bar1_score 14 / bar2_score 15 / REWARD -1 / Q_MAX -4.044689e-01
TIMESTEP 338516 / EPSILON 0.04999999999999416 / bar1_score 15 / bar2_score 15 / REWARD 1 / Q_MAX 1.112308e+00
TIMESTEP 338645 / EPSILON 0.04999999999999416 / bar1_score 15 / bar2_score 16 / REWARD -1 / Q_MAX -5.956888e-01
TIMESTEP 338774 / EPSILON 0.04999999999999416 / bar1_score 16 / bar2_score 16 / REWARD 1 / Q_MAX 1.086398e+00
TIMESTEP 338903 / EPSILON 0.04999999999999416 / bar1_score 16 / bar2_score 17 / REWARD -1 / Q_MAX -5.495504e-01
TIMESTEP 339032 / EPSILON 0.04999999999999416 / bar1_score 17 / bar2_score 17 / REWARD 1 / Q_MAX 1.187719e+00
TI

TIMESTEP 347518 / EPSILON 0.04999999999999416 / bar1_score 12 / bar2_score 12 / REWARD -1 / Q_MAX -4.584716e-01
TIMESTEP 347647 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 12 / REWARD 1 / Q_MAX 1.023307e+00
TIMESTEP 347776 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 13 / REWARD -1 / Q_MAX -5.591217e-01
TIMESTEP 347905 / EPSILON 0.04999999999999416 / bar1_score 14 / bar2_score 13 / REWARD 1 / Q_MAX 1.154331e+00
TIMESTEP 348034 / EPSILON 0.04999999999999416 / bar1_score 14 / bar2_score 14 / REWARD -1 / Q_MAX -5.245178e-01
TIMESTEP 348163 / EPSILON 0.04999999999999416 / bar1_score 15 / bar2_score 14 / REWARD 1 / Q_MAX 1.045875e+00
TIMESTEP 348292 / EPSILON 0.04999999999999416 / bar1_score 15 / bar2_score 15 / REWARD -1 / Q_MAX -5.217637e-01
TIMESTEP 348421 / EPSILON 0.04999999999999416 / bar1_score 16 / bar2_score 15 / REWARD 1 / Q_MAX 1.122298e+00
TIMESTEP 348550 / EPSILON 0.04999999999999416 / bar1_score 16 / bar2_score 16 / REWARD -1 / Q_MAX -5.391083e-01


TIMESTEP 357123 / EPSILON 0.04999999999999416 / bar1_score 11 / bar2_score 14 / REWARD -1 / Q_MAX -5.434315e-01
TIMESTEP 357252 / EPSILON 0.04999999999999416 / bar1_score 12 / bar2_score 14 / REWARD 1 / Q_MAX 1.150200e+00
TIMESTEP 357381 / EPSILON 0.04999999999999416 / bar1_score 12 / bar2_score 15 / REWARD -1 / Q_MAX -4.734146e-01
TIMESTEP 357510 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 15 / REWARD 1 / Q_MAX 1.189010e+00
TIMESTEP 357639 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 16 / REWARD -1 / Q_MAX -4.750395e-01
TIMESTEP 357768 / EPSILON 0.04999999999999416 / bar1_score 14 / bar2_score 16 / REWARD 1 / Q_MAX 1.105116e+00
TIMESTEP 357897 / EPSILON 0.04999999999999416 / bar1_score 14 / bar2_score 17 / REWARD -1 / Q_MAX -5.505821e-01
TIMESTEP 358027 / EPSILON 0.04999999999999416 / bar1_score 15 / bar2_score 17 / REWARD 1 / Q_MAX 1.241590e+00
TIMESTEP 358408 / EPSILON 0.04999999999999416 / bar1_score 16 / bar2_score 17 / REWARD 1 / Q_MAX 9.734890e-01
TI

TIMESTEP 367230 / EPSILON 0.04999999999999416 / bar1_score 12 / bar2_score 12 / REWARD -1 / Q_MAX -4.307702e-01
TIMESTEP 367359 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 12 / REWARD 1 / Q_MAX 1.108888e+00
TIMESTEP 367657 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 13 / REWARD -1 / Q_MAX 2.497195e-01
TIMESTEP 367703 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 14 / REWARD -1 / Q_MAX -6.777745e-01
TIMESTEP 367832 / EPSILON 0.04999999999999416 / bar1_score 14 / bar2_score 14 / REWARD 1 / Q_MAX 1.200807e+00
TIMESTEP 367961 / EPSILON 0.04999999999999416 / bar1_score 14 / bar2_score 15 / REWARD -1 / Q_MAX -4.890988e-01
TIMESTEP 368090 / EPSILON 0.04999999999999416 / bar1_score 15 / bar2_score 15 / REWARD 1 / Q_MAX 1.181663e+00
TIMESTEP 368219 / EPSILON 0.04999999999999416 / bar1_score 15 / bar2_score 16 / REWARD -1 / Q_MAX -4.517058e-01
TIMESTEP 368348 / EPSILON 0.04999999999999416 / bar1_score 16 / bar2_score 16 / REWARD 1 / Q_MAX 1.297533e+00
T

TIMESTEP 378776 / EPSILON 0.04999999999999416 / bar1_score 12 / bar2_score 10 / REWARD 1 / Q_MAX 1.127580e+00
TIMESTEP 378905 / EPSILON 0.04999999999999416 / bar1_score 12 / bar2_score 11 / REWARD -1 / Q_MAX 5.372966e-02
TIMESTEP 379034 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 11 / REWARD 1 / Q_MAX 1.336961e+00
TIMESTEP 379415 / EPSILON 0.04999999999999416 / bar1_score 14 / bar2_score 11 / REWARD 1 / Q_MAX 1.070607e+00
TIMESTEP 379544 / EPSILON 0.04999999999999416 / bar1_score 14 / bar2_score 12 / REWARD -1 / Q_MAX -3.239491e-01
TIMESTEP 379673 / EPSILON 0.04999999999999416 / bar1_score 15 / bar2_score 12 / REWARD 1 / Q_MAX 1.295295e+00
TIMESTEP 380055 / EPSILON 0.04999999999999416 / bar1_score 16 / bar2_score 12 / REWARD 1 / Q_MAX 9.660859e-01
TIMESTEP 380184 / EPSILON 0.04999999999999416 / bar1_score 16 / bar2_score 13 / REWARD -1 / Q_MAX -3.717086e-01
TIMESTEP 380313 / EPSILON 0.04999999999999416 / bar1_score 17 / bar2_score 13 / REWARD 1 / Q_MAX 1.247915e+00
TIMES

TIMESTEP 391254 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 16 / REWARD 1 / Q_MAX 1.223326e+00
TIMESTEP 391383 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 17 / REWARD -1 / Q_MAX -2.128990e-01
TIMESTEP 391429 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 18 / REWARD -1 / Q_MAX -5.406662e-01
TIMESTEP 391475 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 19 / REWARD -1 / Q_MAX -4.267087e-01
TIMESTEP 391604 / EPSILON 0.04999999999999416 / bar1_score 14 / bar2_score 19 / REWARD 1 / Q_MAX 1.360126e+00
TIMESTEP 391733 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -6.895894e-02
TIMESTEP 391864 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 0 / REWARD 1 / Q_MAX 1.381773e+00
TIMESTEP 391993 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 1 / REWARD -1 / Q_MAX -5.113354e-01
TIMESTEP 392122 / EPSILON 0.04999999999999416 / bar1_score 2 / bar2_score 1 / REWARD 1 / Q_MAX 1.227352e+00
TIMESTEP

TIMESTEP 402882 / EPSILON 0.04999999999999416 / bar1_score 19 / bar2_score 17 / REWARD 1 / Q_MAX 1.492298e+00
TIMESTEP 403011 / EPSILON 0.04999999999999416 / bar1_score 19 / bar2_score 18 / REWARD -1 / Q_MAX -1.334865e-01
TIMESTEP 403141 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 0 / REWARD 1 / Q_MAX 1.452258e+00
TIMESTEP 403270 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -3.695690e-01
TIMESTEP 403399 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 1 / REWARD 1 / Q_MAX 1.319021e+00
TIMESTEP 403528 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 2 / REWARD -1 / Q_MAX -3.087386e-01
TIMESTEP 403658 / EPSILON 0.04999999999999416 / bar1_score 2 / bar2_score 2 / REWARD 1 / Q_MAX 1.350498e+00
TIMESTEP 403787 / EPSILON 0.04999999999999416 / bar1_score 2 / bar2_score 3 / REWARD -1 / Q_MAX -3.463544e-01
TIMESTEP 403916 / EPSILON 0.04999999999999416 / bar1_score 3 / bar2_score 3 / REWARD 1 / Q_MAX 1.275304e+00
TIMESTEP 404045 

TIMESTEP 413746 / EPSILON 0.04999999999999416 / bar1_score 17 / bar2_score 18 / REWARD -1 / Q_MAX -4.769392e-01
TIMESTEP 413875 / EPSILON 0.04999999999999416 / bar1_score 18 / bar2_score 18 / REWARD 1 / Q_MAX 1.387835e+00
TIMESTEP 414004 / EPSILON 0.04999999999999416 / bar1_score 18 / bar2_score 19 / REWARD -1 / Q_MAX -1.468366e-01
TIMESTEP 414133 / EPSILON 0.04999999999999416 / bar1_score 19 / bar2_score 19 / REWARD 1 / Q_MAX 1.355433e+00
TIMESTEP 414516 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 0 / REWARD 1 / Q_MAX 1.307102e+00
TIMESTEP 414645 / EPSILON 0.04999999999999416 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -3.703489e-01
TIMESTEP 414774 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 1 / REWARD 1 / Q_MAX 1.366828e+00
TIMESTEP 414903 / EPSILON 0.04999999999999416 / bar1_score 1 / bar2_score 2 / REWARD -1 / Q_MAX -3.669227e-01
TIMESTEP 415032 / EPSILON 0.04999999999999416 / bar1_score 2 / bar2_score 2 / REWARD 1 / Q_MAX 1.274118e+00
TIMESTEP 415

TIMESTEP 427305 / EPSILON 0.04999999999999416 / bar1_score 2 / bar2_score 2 / REWARD 1 / Q_MAX 1.367661e+00
TIMESTEP 427686 / EPSILON 0.04999999999999416 / bar1_score 3 / bar2_score 2 / REWARD 1 / Q_MAX 1.669221e+00
TIMESTEP 427815 / EPSILON 0.04999999999999416 / bar1_score 3 / bar2_score 3 / REWARD -1 / Q_MAX -2.846228e-01
TIMESTEP 427945 / EPSILON 0.04999999999999416 / bar1_score 4 / bar2_score 3 / REWARD 1 / Q_MAX 1.316875e+00
TIMESTEP 428074 / EPSILON 0.04999999999999416 / bar1_score 4 / bar2_score 4 / REWARD -1 / Q_MAX -2.656498e-01
TIMESTEP 428203 / EPSILON 0.04999999999999416 / bar1_score 5 / bar2_score 4 / REWARD 1 / Q_MAX 1.317014e+00
TIMESTEP 428332 / EPSILON 0.04999999999999416 / bar1_score 5 / bar2_score 5 / REWARD -1 / Q_MAX -1.678786e-01
TIMESTEP 428461 / EPSILON 0.04999999999999416 / bar1_score 6 / bar2_score 5 / REWARD 1 / Q_MAX 1.313555e+00
TIMESTEP 428590 / EPSILON 0.04999999999999416 / bar1_score 6 / bar2_score 6 / REWARD -1 / Q_MAX -2.626106e-01
TIMESTEP 428719 / EP

TIMESTEP 441359 / EPSILON 0.04999999999999416 / bar1_score 7 / bar2_score 8 / REWARD -1 / Q_MAX -3.456656e-01
TIMESTEP 441488 / EPSILON 0.04999999999999416 / bar1_score 8 / bar2_score 8 / REWARD 1 / Q_MAX 1.199275e+00
TIMESTEP 441870 / EPSILON 0.04999999999999416 / bar1_score 9 / bar2_score 8 / REWARD 1 / Q_MAX 1.381623e+00
TIMESTEP 441999 / EPSILON 0.04999999999999416 / bar1_score 9 / bar2_score 9 / REWARD -1 / Q_MAX -2.972991e-01
TIMESTEP 442128 / EPSILON 0.04999999999999416 / bar1_score 10 / bar2_score 9 / REWARD 1 / Q_MAX 1.377016e+00
TIMESTEP 442509 / EPSILON 0.04999999999999416 / bar1_score 11 / bar2_score 9 / REWARD 1 / Q_MAX 1.469389e+00
TIMESTEP 442890 / EPSILON 0.04999999999999416 / bar1_score 12 / bar2_score 9 / REWARD 1 / Q_MAX 1.441695e+00
TIMESTEP 443019 / EPSILON 0.04999999999999416 / bar1_score 12 / bar2_score 10 / REWARD -1 / Q_MAX -4.572312e-01
TIMESTEP 443148 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 10 / REWARD 1 / Q_MAX 1.213898e+00
TIMESTEP 443529

TIMESTEP 463467 / EPSILON 0.04999999999999416 / bar1_score 7 / bar2_score 1 / REWARD -1 / Q_MAX -3.945559e-01
TIMESTEP 463596 / EPSILON 0.04999999999999416 / bar1_score 8 / bar2_score 1 / REWARD 1 / Q_MAX 1.242186e+00
TIMESTEP 463977 / EPSILON 0.04999999999999416 / bar1_score 9 / bar2_score 1 / REWARD 1 / Q_MAX 1.378326e+00
TIMESTEP 464359 / EPSILON 0.04999999999999416 / bar1_score 10 / bar2_score 1 / REWARD 1 / Q_MAX 1.549195e+00
TIMESTEP 464740 / EPSILON 0.04999999999999416 / bar1_score 11 / bar2_score 1 / REWARD 1 / Q_MAX 1.492506e+00
TIMESTEP 465122 / EPSILON 0.04999999999999416 / bar1_score 12 / bar2_score 1 / REWARD 1 / Q_MAX 1.538392e+00
TIMESTEP 465503 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 1 / REWARD 1 / Q_MAX 1.319966e+00
TIMESTEP 465884 / EPSILON 0.04999999999999416 / bar1_score 14 / bar2_score 1 / REWARD 1 / Q_MAX 1.480013e+00
TIMESTEP 466265 / EPSILON 0.04999999999999416 / bar1_score 15 / bar2_score 1 / REWARD 1 / Q_MAX 1.389971e+00
TIMESTEP 466646 / EP

TIMESTEP 487442 / EPSILON 0.04999999999999416 / bar1_score 6 / bar2_score 7 / REWARD -1 / Q_MAX 7.447886e-01
TIMESTEP 487571 / EPSILON 0.04999999999999416 / bar1_score 7 / bar2_score 7 / REWARD 1 / Q_MAX 1.425198e+00
TIMESTEP 487700 / EPSILON 0.04999999999999416 / bar1_score 7 / bar2_score 8 / REWARD -1 / Q_MAX -5.214808e-01
TIMESTEP 487832 / EPSILON 0.04999999999999416 / bar1_score 8 / bar2_score 8 / REWARD 1 / Q_MAX 1.069655e+00
TIMESTEP 488215 / EPSILON 0.04999999999999416 / bar1_score 9 / bar2_score 8 / REWARD 1 / Q_MAX 1.503901e+00
TIMESTEP 488596 / EPSILON 0.04999999999999416 / bar1_score 10 / bar2_score 8 / REWARD 1 / Q_MAX 1.476559e+00
TIMESTEP 488978 / EPSILON 0.04999999999999416 / bar1_score 11 / bar2_score 8 / REWARD 1 / Q_MAX 1.366789e+00
TIMESTEP 489359 / EPSILON 0.04999999999999416 / bar1_score 12 / bar2_score 8 / REWARD 1 / Q_MAX 1.335527e+00
TIMESTEP 489742 / EPSILON 0.04999999999999416 / bar1_score 13 / bar2_score 8 / REWARD 1 / Q_MAX 1.493922e+00
TIMESTEP 490123 / EPS

In [6]:
32265/60

537.75

In [3]:
print("Game_Ends_in Time:",int(time.time() - tick))