# Capsule Network for RL

In [1]:
import tensorflow as tf
import cv2
import sys
import pong_fun as game # whichever is imported "as game" will be used
import random
import time 
import numpy as np
from collections import deque

pygame 1.9.4
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
epsilon = 1e-9
iter_routing = 2
train_freq = 10

In [3]:
ACTIONS = 6 # number of valid actions
GAMMA = 0.99 # decay rate of past observations
OBSERVE = 1000. # timesteps to observe before training
EXPLORE = 5000. # frames over which to anneal epsilon
FINAL_EPSILON = 0.05 # final value of epsilon
INITIAL_EPSILON = 1.0 # starting value of epsilon
REPLAY_MEMORY = 50000 # number of previous transitions to remember
BATCH = 32 # size of minibatch

# UPDATED CAPSULE NETWORK

In [4]:
def squash(vector):
    vec_squared_norm = reduce_sum(tf.square(vector), -2, keepdims=True)
    scalar_factor = vec_squared_norm / (1 + vec_squared_norm) / tf.sqrt(vec_squared_norm + epsilon)
    vec_squashed = scalar_factor * vector  # element-wise
    return(vec_squashed)
def routing(input, b_IJ):
    # W: [1, num_caps_i, num_caps_j * len_v_j, len_u_j, 1]
    W = tf.get_variable('Weight', shape=(1, 1024, 160, 8, 1), dtype=tf.float32,
                        initializer=tf.random_normal_initializer(stddev=0.01))
    biases = tf.get_variable('bias', shape=(1, 1, 10, 16, 1))
    # A better solution is using element-wise multiply, reduce_sum and reshape
    # ops instead. Matmul [a, b] x [b, c] is equal to a series ops as
    # element-wise multiply [a*c, b] * [a*c, b], reduce_sum at axis=1 and
    # reshape to [a, c]
    input = tf.tile(input, [1, 1, 160, 1, 1])
    #assert input.get_shape() == [cfg.batch_size, 1024, 160, 8, 1]

    u_hat = reduce_sum(W * input, axis=3, keepdims=True)
    u_hat = tf.reshape(u_hat, shape=[-1, 1024, 10, 16, 1])
    #assert u_hat.get_shape() == [cfg.batch_size, 1024, 10, 16, 1]

    # In forward, u_hat_stopped = u_hat; in backward, no gradient passed back from u_hat_stopped to u_hat
    u_hat_stopped = tf.stop_gradient(u_hat, name='stop_gradient')

    # line 3,for r iterations do
    for r_iter in range(iter_routing):
        with tf.variable_scope('iter_' + str(r_iter)):
            # line 4:
            # => [batch_size, 1024, 10, 1, 1]
            c_IJ = softmax(b_IJ, axis=2)

            # At last iteration, use `u_hat` in order to receive gradients from the following graph
            if r_iter == iter_routing - 1:
                # line 5:
                # weighting u_hat with c_IJ, element-wise in the last two dims
                # => [batch_size, 1024, 10, 16, 1]
                s_J = tf.multiply(c_IJ, u_hat)
                # then sum in the second dim, resulting in [batch_size, 1, 10, 16, 1]
                s_J = reduce_sum(s_J, axis=1, keepdims=True) + biases
                #assert s_J.get_shape() == [cfg.batch_size, 1, 10, 16, 1]

                # line 6:
                # squash using Eq.1,
                v_J = squash(s_J)
                #assert v_J.get_shape() == [cfg.batch_size, 1, 10, 16, 1]
            elif r_iter < iter_routing - 1:  # Inner iterations, do not apply backpropagation
                s_J = tf.multiply(c_IJ, u_hat_stopped)
                s_J = reduce_sum(s_J, axis=1, keepdims=True) + biases
                v_J = squash(s_J)

                # line 7:
                # reshape & tile v_j from [batch_size ,1, 10, 16, 1] to [batch_size, 1024, 10, 16, 1]
                # then matmul in the last tow dim: [16, 1].T x [16, 1] => [1, 1], reduce mean in the
                # batch_size dim, resulting in [1, 1024, 10, 1, 1]
                v_J_tiled = tf.tile(v_J, [1, 1024, 1, 1, 1])
                u_produce_v = reduce_sum(u_hat_stopped * v_J_tiled, axis=3, keepdims=True)
                #assert u_produce_v.get_shape() == [cfg.batch_size, 1024, 10, 1, 1]

                # b_IJ += tf.reduce_sum(u_produce_v, axis=0, keep_dims=True)
                b_IJ += u_produce_v
    return(v_J)
# For version compatibility
def reduce_sum(input_tensor, axis=None, keepdims=False):
    return tf.reduce_sum(input_tensor, axis=axis, keepdims=keepdims)
# For version compatibility
def softmax(logits, axis=None):
    return tf.nn.softmax(logits, axis=axis)

In [None]:
def createNetwork():
    # input layer
    s= tf.placeholder("float", [None, 84, 84, 4])
    coeff = tf.placeholder(tf.float32, shape=(None, 1024, 10, 1, 1))
    ####################### New Network COnfiguration #####################    
    w_initializer, b_initializer = tf.random_normal_initializer(0., 0.01), tf.constant_initializer(0.01)
    w1 = tf.get_variable('w1',[8, 8, 4, 64],initializer=w_initializer)
    b1 = tf.get_variable('b1',[64],initializer=b_initializer)
    # Convolution Layer
    # Conv1, [batch_size, 20, 20, 64]
    l1 = tf.nn.conv2d(s, w1, strides=[1, 4, 4, 1], padding="VALID")
    
    conv1 = tf.nn.relu(tf.nn.bias_add(l1, b1))
    
    conv1 = tf.reshape(conv1,[-1,20,20,64])
    
    capsules = tf.contrib.layers.conv2d(conv1, 16 * 8, kernel_size=6, stride=2, padding="VALID",
                    activation_fn = tf.nn.relu,
                    weights_initializer = tf.contrib.layers.xavier_initializer(uniform=False),
                    biases_initializer=tf.constant_initializer(0))
    
    capsules = tf.reshape(capsules, (-1, 1024, 8, 1)) #Reshape to(batch_szie, 1152, 8, 1)
    
    capsules = squash(capsules)
    
    input_caps2 = tf.reshape(capsules, shape=(-1, 1024, 1, capsules.shape[-2].value, 1))
    
    caps2 = routing(input_caps2, coeff)
    
    vector_j = tf.reshape(caps2, shape=(-1, 160))
    print(vector_j)
    q_eval = tf.contrib.layers.fully_connected(vector_j, num_outputs=ACTIONS, activation_fn=None)

    print(q_eval)
    readout = q_eval
    return s, coeff, readout

In [None]:
def trainNetwork(s, coeff, readout, sess):
    tick = time.time()
    # define the cost function
    a = tf.placeholder("float", [None, ACTIONS])
    y = tf.placeholder("float", [None])
    readout_action = tf.reduce_sum(tf.multiply(readout, a), reduction_indices = 1)
    cost = tf.reduce_mean(tf.square(y - readout_action))
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cost)

    # open up a game state to communicate with emulator
    game_state = game.GameState()
    # store the previous observations in replay memory
    D = deque()
    # get the first state by doing nothing and preprocess the image to 80x80x4
    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] = 1
    x_t, r_0, terminal, bar1_score, bar2_score = game_state.frame_step(do_nothing)
    x_t = cv2.cvtColor(cv2.resize(x_t, (84, 84)), cv2.COLOR_BGR2GRAY)
    ret, x_t = cv2.threshold(x_t,1,255,cv2.THRESH_BINARY)
    s_t = np.stack((x_t, x_t, x_t, x_t), axis = 2)  
    # saving and loading networks
    # saver = tf.train.Saver()
    # sess.run(tf.initialize_all_variables())
    sess.run(tf.global_variables_initializer())
    b_IJ1 = np.zeros((1, 1024, 10, 1, 1)).astype(np.float32) # batch_size=1
    b_IJ2 = np.zeros((BATCH, 1024, 10, 1, 1)).astype(np.float32) # batch_size=BATCH
    epsilon = INITIAL_EPSILON
    t = 0
    episode = 0
    while True:
        # choose an action epsilon greedily
        # readout_t = readout.eval(feed_dict = {s : [s_t].reshape((1,80,80,4))})[0]
        
        readout_t = readout.eval(feed_dict = {s:s_t.reshape((1,84,84,4)), coeff:b_IJ1})
        
        a_t = np.zeros([ACTIONS])
        action_index = 0
        if random.random() <= epsilon or t <= OBSERVE:
            action_index = random.randrange(ACTIONS)
            a_t[action_index] = 1
        else:
            action_index = np.argmax(readout_t)
            a_t[action_index] = 1

        # scale down epsilon
        if epsilon > FINAL_EPSILON and t > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE

        # run the selected action and observe next state and reward
        x_t1_col, r_t, terminal, bar1_score, bar2_score = game_state.frame_step(a_t)
        if(terminal == 1):
            episode +=1
        x_t1 = cv2.cvtColor(cv2.resize(x_t1_col, (84, 84)), cv2.COLOR_BGR2GRAY)
        ret, x_t1 = cv2.threshold(x_t1,1,255,cv2.THRESH_BINARY)
        x_t1 = np.reshape(x_t1, (84, 84, 1))
        s_t1 = np.append(x_t1, s_t[:,:,0:3], axis = 2)

        # store the transition in D
        D.append((s_t, a_t, r_t, s_t1, terminal))
        if len(D) > REPLAY_MEMORY:
            D.popleft()
        
        # only train if done observing
        if t > OBSERVE and t%train_freq==0:
            # sample a minibatch to train on
            minibatch = random.sample(D, BATCH)

            # get the batch variables
            s_j_batch = [d[0] for d in minibatch]
            a_batch = [d[1] for d in minibatch]
            r_batch = [d[2] for d in minibatch]
            s_j1_batch = [d[3] for d in minibatch]

            y_batch = []
            readout_j1_batch = readout.eval(feed_dict = {s:s_j1_batch, coeff:b_IJ2 })
            #readout_j1_batch = readout.eval(feed_dict = {s : s_j1_batch})
            for i in range(0, len(minibatch)):
                # if terminal only equals reward
                if minibatch[i][4]:
                    y_batch.append(r_batch[i])
                else:
                    y_batch.append(r_batch[i] + GAMMA * np.max(readout_j1_batch[i]))

            # perform gradient step
            train_step.run(feed_dict = {
                y : y_batch,
                a : a_batch,
                s : s_j_batch,
                coeff: b_IJ2})

        # update the old values
        s_t = s_t1
        t += 1

        # save progress every 10000 iterations
        #if t % 10000 == 0:
        #    saver.save(sess, 'saved_networks/' + GAME + '-dqn', global_step = t)
        if r_t!= 0:
            print ("TIMESTEP", t, "/ e", episode, "/ bar1_score", bar1_score, "/ bar2_score", bar2_score, "/ REWARD", r_t, "/ Q_MAX %e" % np.max(readout_t))

        if( (bar1_score - bar2_score) > 18): 
            print("Game_Ends_in Time:",int(time.time() - tick))
            break;   
            
        # write info to files
        '''
        if t % 10000 <= 100:
            a_file.write(",".join([str(x) for x in readout_t]) + '\n')
            h_file.write(",".join([str(x) for x in h_fc1.eval(feed_dict={s:[s_t]})[0]]) + '\n')
            cv2.imwrite("logs_tetris/frame" + str(t) + ".png", x_t1)
        '''

In [None]:
def playGame():
    tf.reset_default_graph()
    sess = tf.InteractiveSession()
    s, coeff, readout = createNetwork()
    trainNetwork(s, coeff, readout, sess)

In [None]:
def main():
    playGame()

if __name__ == "__main__":
    tick = time.time()
    main()
    print("Game_Ends_in Time:",int(time.time() - tick))
    print("____________ END HERE _____________")

Tensor("Reshape_4:0", shape=(?, 160), dtype=float32)
Tensor("fully_connected/BiasAdd:0", shape=(?, 6), dtype=float32)
TIMESTEP 129 / e 0 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX 7.505484e-02
TIMESTEP 175 / e 0 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX 8.441585e-02
TIMESTEP 221 / e 0 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX 7.697585e-02
TIMESTEP 267 / e 0 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX 8.209936e-02
TIMESTEP 313 / e 0 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX 7.899049e-02
TIMESTEP 359 / e 0 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX 8.756793e-02
TIMESTEP 405 / e 0 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX 8.355741e-02
TIMESTEP 451 / e 0 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX 8.483987e-02
TIMESTEP 497 / e 0 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX 8.355837e-02
TIMESTEP 543 / e 0 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX 8.666156e-02
TIMESTEP 589 / e 0 / bar1_score 0 / bar2_score 11 / REWARD -1

TIMESTEP 5811 / e 4 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX 2.022390e+00
TIMESTEP 5857 / e 4 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX 2.043667e+00
TIMESTEP 5903 / e 4 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX 2.049613e+00
TIMESTEP 5949 / e 4 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX 2.003599e+00
TIMESTEP 5995 / e 4 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX 2.038406e+00
TIMESTEP 6041 / e 5 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX 2.017385e+00
TIMESTEP 6087 / e 5 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX 2.041587e+00
TIMESTEP 6133 / e 5 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX 2.012958e+00
TIMESTEP 6179 / e 5 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX 2.101731e+00
TIMESTEP 6225 / e 5 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX 2.032404e+00
TIMESTEP 6271 / e 5 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX 2.083940e+00
TIMESTEP 6487 / e 5 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX 2.037361e+00

TIMESTEP 12828 / e 9 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX 2.058839e+00
TIMESTEP 12874 / e 9 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX 2.115083e+00
TIMESTEP 12920 / e 9 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX 2.101838e+00
TIMESTEP 12966 / e 9 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX 2.097155e+00
TIMESTEP 13264 / e 9 / bar1_score 1 / bar2_score 9 / REWARD 1 / Q_MAX 2.178133e+00
TIMESTEP 13393 / e 9 / bar1_score 1 / bar2_score 10 / REWARD -1 / Q_MAX 2.257582e+00
TIMESTEP 13439 / e 9 / bar1_score 1 / bar2_score 11 / REWARD -1 / Q_MAX 2.282442e+00
TIMESTEP 13485 / e 9 / bar1_score 1 / bar2_score 12 / REWARD -1 / Q_MAX 2.220992e+00
TIMESTEP 13531 / e 9 / bar1_score 1 / bar2_score 13 / REWARD -1 / Q_MAX 2.277754e+00
TIMESTEP 13577 / e 9 / bar1_score 1 / bar2_score 14 / REWARD -1 / Q_MAX 2.214921e+00
TIMESTEP 13623 / e 9 / bar1_score 1 / bar2_score 15 / REWARD -1 / Q_MAX 2.229576e+00
TIMESTEP 13669 / e 9 / bar1_score 1 / bar2_score 16 / REWARD -1 / Q_MAX

TIMESTEP 18795 / e 13 / bar1_score 1 / bar2_score 17 / REWARD -1 / Q_MAX 1.068085e+00
TIMESTEP 18841 / e 13 / bar1_score 1 / bar2_score 18 / REWARD -1 / Q_MAX 1.245030e+00
TIMESTEP 18887 / e 13 / bar1_score 1 / bar2_score 19 / REWARD -1 / Q_MAX 1.176573e+00
TIMESTEP 18933 / e 14 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX 1.238583e+00
TIMESTEP 18979 / e 14 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX 1.600637e+00
TIMESTEP 19025 / e 14 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX 1.061883e+00
TIMESTEP 19071 / e 14 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX 1.291385e+00
TIMESTEP 19117 / e 14 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX 1.318389e+00
TIMESTEP 19163 / e 14 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX 1.358538e+00
TIMESTEP 19209 / e 14 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX 1.196287e+00
TIMESTEP 19255 / e 14 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX 1.362226e+00
TIMESTEP 19301 / e 14 / bar1_score 0 / bar2_score 8 / REWARD -

TIMESTEP 24883 / e 18 / bar1_score 1 / bar2_score 6 / REWARD -1 / Q_MAX 1.698384e+00
TIMESTEP 24929 / e 18 / bar1_score 1 / bar2_score 7 / REWARD -1 / Q_MAX 1.281171e+00
TIMESTEP 25142 / e 18 / bar1_score 1 / bar2_score 8 / REWARD -1 / Q_MAX 1.773054e+00
TIMESTEP 25188 / e 18 / bar1_score 1 / bar2_score 9 / REWARD -1 / Q_MAX 1.367824e+00
TIMESTEP 25234 / e 18 / bar1_score 1 / bar2_score 10 / REWARD -1 / Q_MAX 1.063390e+00
TIMESTEP 25363 / e 18 / bar1_score 2 / bar2_score 10 / REWARD 1 / Q_MAX 1.846761e+00
TIMESTEP 25492 / e 18 / bar1_score 2 / bar2_score 11 / REWARD -1 / Q_MAX 1.145269e+00
TIMESTEP 25538 / e 18 / bar1_score 2 / bar2_score 12 / REWARD -1 / Q_MAX 9.536700e-01
TIMESTEP 25751 / e 18 / bar1_score 2 / bar2_score 13 / REWARD -1 / Q_MAX 1.366613e+00
TIMESTEP 25797 / e 18 / bar1_score 2 / bar2_score 14 / REWARD -1 / Q_MAX 1.230184e+00
TIMESTEP 26010 / e 18 / bar1_score 2 / bar2_score 15 / REWARD -1 / Q_MAX 1.540674e+00
TIMESTEP 26056 / e 18 / bar1_score 2 / bar2_score 16 / REWA

TIMESTEP 32388 / e 22 / bar1_score 2 / bar2_score 12 / REWARD 1 / Q_MAX 1.932792e+00
TIMESTEP 32517 / e 22 / bar1_score 2 / bar2_score 13 / REWARD -1 / Q_MAX 1.023293e+00
TIMESTEP 32563 / e 22 / bar1_score 2 / bar2_score 14 / REWARD -1 / Q_MAX 6.743419e-01
TIMESTEP 32609 / e 22 / bar1_score 2 / bar2_score 15 / REWARD -1 / Q_MAX 8.063160e-01
TIMESTEP 32655 / e 22 / bar1_score 2 / bar2_score 16 / REWARD -1 / Q_MAX 4.517286e-01
TIMESTEP 32701 / e 22 / bar1_score 2 / bar2_score 17 / REWARD -1 / Q_MAX 7.441127e-01
TIMESTEP 32830 / e 22 / bar1_score 3 / bar2_score 17 / REWARD 1 / Q_MAX 2.040540e+00
TIMESTEP 32959 / e 22 / bar1_score 3 / bar2_score 18 / REWARD -1 / Q_MAX 1.031806e+00
TIMESTEP 33005 / e 22 / bar1_score 3 / bar2_score 19 / REWARD -1 / Q_MAX 6.052704e-01
TIMESTEP 33051 / e 23 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX 5.114159e-01
TIMESTEP 33097 / e 23 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX 3.455999e-01
TIMESTEP 33143 / e 23 / bar1_score 0 / bar2_score 2 / REWA

TIMESTEP 40881 / e 26 / bar1_score 1 / bar2_score 11 / REWARD -1 / Q_MAX 8.871042e-01
TIMESTEP 41094 / e 26 / bar1_score 1 / bar2_score 12 / REWARD -1 / Q_MAX 1.072945e+00
TIMESTEP 41140 / e 26 / bar1_score 1 / bar2_score 13 / REWARD -1 / Q_MAX 8.462496e-01
TIMESTEP 41186 / e 26 / bar1_score 1 / bar2_score 14 / REWARD -1 / Q_MAX 8.613355e-01
TIMESTEP 41315 / e 26 / bar1_score 2 / bar2_score 14 / REWARD 1 / Q_MAX 2.595545e+00
TIMESTEP 41444 / e 26 / bar1_score 2 / bar2_score 15 / REWARD -1 / Q_MAX 6.973336e-01
TIMESTEP 41490 / e 26 / bar1_score 2 / bar2_score 16 / REWARD -1 / Q_MAX 7.275587e-01
TIMESTEP 41536 / e 26 / bar1_score 2 / bar2_score 17 / REWARD -1 / Q_MAX 8.009608e-01
TIMESTEP 41582 / e 26 / bar1_score 2 / bar2_score 18 / REWARD -1 / Q_MAX 5.997099e-01
TIMESTEP 41628 / e 26 / bar1_score 2 / bar2_score 19 / REWARD -1 / Q_MAX 7.908754e-01
TIMESTEP 41674 / e 27 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX 4.341797e-01
TIMESTEP 41720 / e 27 / bar1_score 0 / bar2_score 1 / RE

TIMESTEP 49347 / e 30 / bar1_score 5 / bar2_score 9 / REWARD -1 / Q_MAX 5.300556e-01
TIMESTEP 49478 / e 30 / bar1_score 6 / bar2_score 9 / REWARD 1 / Q_MAX 2.821415e+00
TIMESTEP 49607 / e 30 / bar1_score 6 / bar2_score 10 / REWARD -1 / Q_MAX 6.803880e-01
TIMESTEP 49736 / e 30 / bar1_score 7 / bar2_score 10 / REWARD 1 / Q_MAX 2.893398e+00
TIMESTEP 49865 / e 30 / bar1_score 7 / bar2_score 11 / REWARD -1 / Q_MAX 9.786171e-01
TIMESTEP 49996 / e 30 / bar1_score 8 / bar2_score 11 / REWARD 1 / Q_MAX 2.803344e+00
TIMESTEP 50125 / e 30 / bar1_score 8 / bar2_score 12 / REWARD -1 / Q_MAX 9.395697e-01
TIMESTEP 50171 / e 30 / bar1_score 8 / bar2_score 13 / REWARD -1 / Q_MAX 7.169942e-01
TIMESTEP 50217 / e 30 / bar1_score 8 / bar2_score 14 / REWARD -1 / Q_MAX 8.992683e-01
TIMESTEP 50346 / e 30 / bar1_score 9 / bar2_score 14 / REWARD 1 / Q_MAX 2.942011e+00
TIMESTEP 50475 / e 30 / bar1_score 9 / bar2_score 15 / REWARD -1 / Q_MAX 1.436363e+00
TIMESTEP 50521 / e 30 / bar1_score 9 / bar2_score 16 / REWAR

TIMESTEP 60842 / e 33 / bar1_score 5 / bar2_score 4 / REWARD 1 / Q_MAX 3.131935e+00
TIMESTEP 60971 / e 33 / bar1_score 5 / bar2_score 5 / REWARD -1 / Q_MAX 7.440069e-01
TIMESTEP 61100 / e 33 / bar1_score 6 / bar2_score 5 / REWARD 1 / Q_MAX 3.259757e+00
TIMESTEP 61229 / e 33 / bar1_score 6 / bar2_score 6 / REWARD -1 / Q_MAX 7.565650e-01
TIMESTEP 61361 / e 33 / bar1_score 7 / bar2_score 6 / REWARD 1 / Q_MAX 3.112978e+00
TIMESTEP 61490 / e 33 / bar1_score 7 / bar2_score 7 / REWARD -1 / Q_MAX 9.544862e-01
TIMESTEP 61536 / e 33 / bar1_score 7 / bar2_score 8 / REWARD -1 / Q_MAX 1.022989e+00
TIMESTEP 61582 / e 33 / bar1_score 7 / bar2_score 9 / REWARD -1 / Q_MAX 7.950318e-01
TIMESTEP 61711 / e 33 / bar1_score 8 / bar2_score 9 / REWARD 1 / Q_MAX 3.196852e+00
TIMESTEP 61840 / e 33 / bar1_score 8 / bar2_score 10 / REWARD -1 / Q_MAX 1.141458e+00
TIMESTEP 61886 / e 33 / bar1_score 8 / bar2_score 11 / REWARD -1 / Q_MAX 6.324227e-01
TIMESTEP 61932 / e 33 / bar1_score 8 / bar2_score 12 / REWARD -1 / 

TIMESTEP 71976 / e 36 / bar1_score 5 / bar2_score 6 / REWARD 1 / Q_MAX 3.239430e+00
TIMESTEP 72105 / e 36 / bar1_score 5 / bar2_score 7 / REWARD -1 / Q_MAX 5.619459e-01
TIMESTEP 72234 / e 36 / bar1_score 6 / bar2_score 7 / REWARD 1 / Q_MAX 3.421689e+00
TIMESTEP 72363 / e 36 / bar1_score 6 / bar2_score 8 / REWARD -1 / Q_MAX 9.203556e-01
TIMESTEP 72492 / e 36 / bar1_score 7 / bar2_score 8 / REWARD 1 / Q_MAX 3.389413e+00
TIMESTEP 72621 / e 36 / bar1_score 7 / bar2_score 9 / REWARD -1 / Q_MAX 7.333338e-01
TIMESTEP 72750 / e 36 / bar1_score 8 / bar2_score 9 / REWARD 1 / Q_MAX 3.256601e+00
TIMESTEP 72879 / e 36 / bar1_score 8 / bar2_score 10 / REWARD -1 / Q_MAX 9.164343e-01
TIMESTEP 73008 / e 36 / bar1_score 9 / bar2_score 10 / REWARD 1 / Q_MAX 3.424196e+00
TIMESTEP 73137 / e 36 / bar1_score 9 / bar2_score 11 / REWARD -1 / Q_MAX 1.160551e+00
TIMESTEP 73266 / e 36 / bar1_score 10 / bar2_score 11 / REWARD 1 / Q_MAX 3.541979e+00
TIMESTEP 73395 / e 36 / bar1_score 10 / bar2_score 12 / REWARD -1 

TIMESTEP 83200 / e 39 / bar1_score 7 / bar2_score 9 / REWARD -1 / Q_MAX 1.454872e+00
TIMESTEP 83329 / e 39 / bar1_score 8 / bar2_score 9 / REWARD 1 / Q_MAX 3.783647e+00
TIMESTEP 83458 / e 39 / bar1_score 8 / bar2_score 10 / REWARD -1 / Q_MAX 2.900786e+00
TIMESTEP 83504 / e 39 / bar1_score 8 / bar2_score 11 / REWARD -1 / Q_MAX 2.641065e+00
TIMESTEP 83550 / e 39 / bar1_score 8 / bar2_score 12 / REWARD -1 / Q_MAX 3.074001e+00
TIMESTEP 83596 / e 39 / bar1_score 8 / bar2_score 13 / REWARD -1 / Q_MAX 2.609068e+00
TIMESTEP 83642 / e 39 / bar1_score 8 / bar2_score 14 / REWARD -1 / Q_MAX 2.391518e+00
TIMESTEP 83773 / e 39 / bar1_score 9 / bar2_score 14 / REWARD 1 / Q_MAX 3.627917e+00
TIMESTEP 83902 / e 39 / bar1_score 9 / bar2_score 15 / REWARD -1 / Q_MAX 2.074125e+00
TIMESTEP 84033 / e 39 / bar1_score 10 / bar2_score 15 / REWARD 1 / Q_MAX 3.631127e+00
TIMESTEP 84330 / e 39 / bar1_score 10 / bar2_score 16 / REWARD -1 / Q_MAX 2.727891e+00
TIMESTEP 84376 / e 39 / bar1_score 10 / bar2_score 17 / R

TIMESTEP 95466 / e 42 / bar1_score 2 / bar2_score 11 / REWARD -1 / Q_MAX 4.728926e-01
TIMESTEP 95512 / e 42 / bar1_score 2 / bar2_score 12 / REWARD -1 / Q_MAX 7.555043e-01
TIMESTEP 95725 / e 42 / bar1_score 2 / bar2_score 13 / REWARD -1 / Q_MAX 2.313141e-01
TIMESTEP 95856 / e 42 / bar1_score 3 / bar2_score 13 / REWARD 1 / Q_MAX 2.894815e+00
TIMESTEP 95985 / e 42 / bar1_score 3 / bar2_score 14 / REWARD -1 / Q_MAX 1.113667e+00
TIMESTEP 96114 / e 42 / bar1_score 4 / bar2_score 14 / REWARD 1 / Q_MAX 2.919585e+00
TIMESTEP 96243 / e 42 / bar1_score 4 / bar2_score 15 / REWARD -1 / Q_MAX 1.467926e+00
TIMESTEP 96372 / e 42 / bar1_score 5 / bar2_score 15 / REWARD 1 / Q_MAX 3.081886e+00
TIMESTEP 96501 / e 42 / bar1_score 5 / bar2_score 16 / REWARD -1 / Q_MAX 1.518672e+00
TIMESTEP 96632 / e 42 / bar1_score 6 / bar2_score 16 / REWARD 1 / Q_MAX 3.200587e+00
TIMESTEP 96761 / e 42 / bar1_score 6 / bar2_score 17 / REWARD -1 / Q_MAX 1.354666e+00
TIMESTEP 96892 / e 42 / bar1_score 7 / bar2_score 17 / REW

TIMESTEP 106724 / e 45 / bar1_score 5 / bar2_score 15 / REWARD -1 / Q_MAX 1.088247e+00
TIMESTEP 106770 / e 45 / bar1_score 5 / bar2_score 16 / REWARD -1 / Q_MAX 1.216044e+00
TIMESTEP 106983 / e 45 / bar1_score 5 / bar2_score 17 / REWARD -1 / Q_MAX 9.409724e-01
TIMESTEP 107114 / e 45 / bar1_score 6 / bar2_score 17 / REWARD 1 / Q_MAX 3.331401e+00
TIMESTEP 107243 / e 45 / bar1_score 6 / bar2_score 18 / REWARD -1 / Q_MAX 1.402972e+00
TIMESTEP 107289 / e 45 / bar1_score 6 / bar2_score 19 / REWARD -1 / Q_MAX 1.216388e+00
TIMESTEP 107502 / e 46 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX 6.260771e-01
TIMESTEP 107631 / e 46 / bar1_score 1 / bar2_score 0 / REWARD 1 / Q_MAX 3.237744e+00
TIMESTEP 107760 / e 46 / bar1_score 1 / bar2_score 1 / REWARD -1 / Q_MAX 1.096103e+00
TIMESTEP 107889 / e 46 / bar1_score 2 / bar2_score 1 / REWARD 1 / Q_MAX 3.240149e+00
TIMESTEP 108018 / e 46 / bar1_score 2 / bar2_score 2 / REWARD -1 / Q_MAX 1.040309e+00
TIMESTEP 108147 / e 46 / bar1_score 3 / bar2_score 

TIMESTEP 119036 / e 48 / bar1_score 10 / bar2_score 14 / REWARD 1 / Q_MAX 3.253309e+00
TIMESTEP 119165 / e 48 / bar1_score 10 / bar2_score 15 / REWARD -1 / Q_MAX 8.852508e-01
TIMESTEP 119296 / e 48 / bar1_score 11 / bar2_score 15 / REWARD 1 / Q_MAX 3.100647e+00
TIMESTEP 119425 / e 48 / bar1_score 11 / bar2_score 16 / REWARD -1 / Q_MAX 6.839696e-01
TIMESTEP 119471 / e 48 / bar1_score 11 / bar2_score 17 / REWARD -1 / Q_MAX 7.452593e-01
TIMESTEP 119517 / e 48 / bar1_score 11 / bar2_score 18 / REWARD -1 / Q_MAX 4.016148e-01
TIMESTEP 119563 / e 48 / bar1_score 11 / bar2_score 19 / REWARD -1 / Q_MAX 2.327024e-01
TIMESTEP 119777 / e 49 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX 1.234016e+00
TIMESTEP 119823 / e 49 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX 2.273852e+00
TIMESTEP 119869 / e 49 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX 1.307921e+00
TIMESTEP 119915 / e 49 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX 1.324819e+00
TIMESTEP 119961 / e 49 / bar1_score 0 / ba

TIMESTEP 129999 / e 51 / bar1_score 10 / bar2_score 18 / REWARD -1 / Q_MAX 9.298524e-01
TIMESTEP 130128 / e 51 / bar1_score 11 / bar2_score 18 / REWARD 1 / Q_MAX 2.927960e+00
TIMESTEP 130257 / e 51 / bar1_score 11 / bar2_score 19 / REWARD -1 / Q_MAX 7.618401e-01
TIMESTEP 130386 / e 51 / bar1_score 12 / bar2_score 19 / REWARD 1 / Q_MAX 2.781916e+00
TIMESTEP 130515 / e 52 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX 7.439036e-01
TIMESTEP 130646 / e 52 / bar1_score 1 / bar2_score 0 / REWARD 1 / Q_MAX 3.172365e+00
TIMESTEP 130775 / e 52 / bar1_score 1 / bar2_score 1 / REWARD -1 / Q_MAX 1.237630e+00
TIMESTEP 130904 / e 52 / bar1_score 2 / bar2_score 1 / REWARD 1 / Q_MAX 3.022934e+00
TIMESTEP 131033 / e 52 / bar1_score 2 / bar2_score 2 / REWARD -1 / Q_MAX 1.063170e+00
TIMESTEP 131162 / e 52 / bar1_score 3 / bar2_score 2 / REWARD 1 / Q_MAX 3.265649e+00
TIMESTEP 131291 / e 52 / bar1_score 3 / bar2_score 3 / REWARD -1 / Q_MAX 1.451986e+00
TIMESTEP 131420 / e 52 / bar1_score 4 / bar2_score 

TIMESTEP 141038 / e 54 / bar1_score 11 / bar2_score 18 / REWARD -1 / Q_MAX 5.542720e-01
TIMESTEP 141084 / e 54 / bar1_score 11 / bar2_score 19 / REWARD -1 / Q_MAX 3.889292e-01
TIMESTEP 141130 / e 55 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX 6.766859e-01
TIMESTEP 141259 / e 55 / bar1_score 1 / bar2_score 0 / REWARD 1 / Q_MAX 3.290192e+00
TIMESTEP 141388 / e 55 / bar1_score 1 / bar2_score 1 / REWARD -1 / Q_MAX 1.437187e+00
TIMESTEP 141517 / e 55 / bar1_score 2 / bar2_score 1 / REWARD 1 / Q_MAX 3.188587e+00
TIMESTEP 141646 / e 55 / bar1_score 2 / bar2_score 2 / REWARD -1 / Q_MAX 1.270293e+00
TIMESTEP 141775 / e 55 / bar1_score 3 / bar2_score 2 / REWARD 1 / Q_MAX 2.955966e+00
TIMESTEP 141904 / e 55 / bar1_score 3 / bar2_score 3 / REWARD -1 / Q_MAX 1.007172e+00
TIMESTEP 141950 / e 55 / bar1_score 3 / bar2_score 4 / REWARD -1 / Q_MAX 1.554188e+00
TIMESTEP 142164 / e 55 / bar1_score 3 / bar2_score 5 / REWARD -1 / Q_MAX 1.469324e+00
TIMESTEP 142295 / e 55 / bar1_score 4 / bar2_score 5 

TIMESTEP 154003 / e 57 / bar1_score 13 / bar2_score 12 / REWARD 1 / Q_MAX 2.830904e+00
TIMESTEP 154132 / e 57 / bar1_score 13 / bar2_score 13 / REWARD -1 / Q_MAX 8.928468e-01
TIMESTEP 154261 / e 57 / bar1_score 14 / bar2_score 13 / REWARD 1 / Q_MAX 2.880016e+00
TIMESTEP 154390 / e 57 / bar1_score 14 / bar2_score 14 / REWARD -1 / Q_MAX 7.539380e-01
TIMESTEP 154519 / e 57 / bar1_score 15 / bar2_score 14 / REWARD 1 / Q_MAX 2.512127e+00
TIMESTEP 154648 / e 57 / bar1_score 15 / bar2_score 15 / REWARD -1 / Q_MAX 4.745135e-01
TIMESTEP 154777 / e 57 / bar1_score 16 / bar2_score 15 / REWARD 1 / Q_MAX 2.326448e+00
TIMESTEP 154906 / e 57 / bar1_score 16 / bar2_score 16 / REWARD -1 / Q_MAX 3.798779e-01
TIMESTEP 155035 / e 57 / bar1_score 17 / bar2_score 16 / REWARD 1 / Q_MAX 2.567303e+00
TIMESTEP 155332 / e 57 / bar1_score 17 / bar2_score 17 / REWARD -1 / Q_MAX 3.741705e-01
TIMESTEP 155545 / e 57 / bar1_score 17 / bar2_score 18 / REWARD -1 / Q_MAX 5.503837e-01
TIMESTEP 155675 / e 57 / bar1_score 1

TIMESTEP 169992 / e 60 / bar1_score 8 / bar2_score 6 / REWARD -1 / Q_MAX 7.718576e-01
TIMESTEP 170289 / e 60 / bar1_score 9 / bar2_score 6 / REWARD 1 / Q_MAX 2.287212e+00
TIMESTEP 170586 / e 60 / bar1_score 9 / bar2_score 7 / REWARD -1 / Q_MAX 1.484897e+00
TIMESTEP 170802 / e 60 / bar1_score 9 / bar2_score 8 / REWARD -1 / Q_MAX 1.681418e+00
TIMESTEP 170931 / e 60 / bar1_score 10 / bar2_score 8 / REWARD 1 / Q_MAX 2.912874e+00
TIMESTEP 171060 / e 60 / bar1_score 10 / bar2_score 9 / REWARD -1 / Q_MAX 7.747676e-01
TIMESTEP 171189 / e 60 / bar1_score 11 / bar2_score 9 / REWARD 1 / Q_MAX 2.827750e+00
TIMESTEP 171318 / e 60 / bar1_score 11 / bar2_score 10 / REWARD -1 / Q_MAX 7.672310e-01
TIMESTEP 171447 / e 60 / bar1_score 12 / bar2_score 10 / REWARD 1 / Q_MAX 2.785249e+00
TIMESTEP 171576 / e 60 / bar1_score 12 / bar2_score 11 / REWARD -1 / Q_MAX 9.698753e-01
TIMESTEP 171622 / e 60 / bar1_score 12 / bar2_score 12 / REWARD -1 / Q_MAX 6.244224e-01
TIMESTEP 171668 / e 60 / bar1_score 12 / bar2_s

TIMESTEP 186837 / e 63 / bar1_score 3 / bar2_score 4 / REWARD 1 / Q_MAX 2.933042e+00
TIMESTEP 187134 / e 63 / bar1_score 3 / bar2_score 5 / REWARD -1 / Q_MAX 6.153691e-01
TIMESTEP 187432 / e 63 / bar1_score 4 / bar2_score 5 / REWARD 1 / Q_MAX 2.483433e+00
TIMESTEP 187561 / e 63 / bar1_score 4 / bar2_score 6 / REWARD -1 / Q_MAX 9.765352e-01
TIMESTEP 187690 / e 63 / bar1_score 5 / bar2_score 6 / REWARD 1 / Q_MAX 2.577076e+00
TIMESTEP 188071 / e 63 / bar1_score 6 / bar2_score 6 / REWARD 1 / Q_MAX 2.494866e+00
TIMESTEP 188454 / e 63 / bar1_score 7 / bar2_score 6 / REWARD 1 / Q_MAX 2.579216e+00
TIMESTEP 188835 / e 63 / bar1_score 8 / bar2_score 6 / REWARD 1 / Q_MAX 2.621633e+00
TIMESTEP 188964 / e 63 / bar1_score 8 / bar2_score 7 / REWARD -1 / Q_MAX 7.869551e-01
TIMESTEP 189093 / e 63 / bar1_score 9 / bar2_score 7 / REWARD 1 / Q_MAX 2.733888e+00
TIMESTEP 189391 / e 63 / bar1_score 9 / bar2_score 8 / REWARD -1 / Q_MAX 1.219913e+00
TIMESTEP 189604 / e 63 / bar1_score 9 / bar2_score 9 / REWARD

TIMESTEP 206692 / e 65 / bar1_score 14 / bar2_score 11 / REWARD -1 / Q_MAX 8.168675e-01
TIMESTEP 206989 / e 65 / bar1_score 15 / bar2_score 11 / REWARD 1 / Q_MAX 2.344151e+00
TIMESTEP 207118 / e 65 / bar1_score 15 / bar2_score 12 / REWARD -1 / Q_MAX 7.613841e-01
TIMESTEP 207164 / e 65 / bar1_score 15 / bar2_score 13 / REWARD -1 / Q_MAX 5.832847e-01
TIMESTEP 207210 / e 65 / bar1_score 15 / bar2_score 14 / REWARD -1 / Q_MAX 8.294547e-01
TIMESTEP 207341 / e 65 / bar1_score 16 / bar2_score 14 / REWARD 1 / Q_MAX 2.799772e+00
TIMESTEP 207470 / e 65 / bar1_score 16 / bar2_score 15 / REWARD -1 / Q_MAX 9.493324e-01
TIMESTEP 207599 / e 65 / bar1_score 17 / bar2_score 15 / REWARD 1 / Q_MAX 2.513367e+00
TIMESTEP 207896 / e 65 / bar1_score 17 / bar2_score 16 / REWARD -1 / Q_MAX 1.110100e+00
TIMESTEP 207942 / e 65 / bar1_score 17 / bar2_score 17 / REWARD -1 / Q_MAX 1.051729e+00
TIMESTEP 207988 / e 65 / bar1_score 17 / bar2_score 18 / REWARD -1 / Q_MAX 8.126732e-01
TIMESTEP 208201 / e 65 / bar1_score

TIMESTEP 226055 / e 68 / bar1_score 7 / bar2_score 2 / REWARD 1 / Q_MAX 2.486493e+00
TIMESTEP 226436 / e 68 / bar1_score 8 / bar2_score 2 / REWARD 1 / Q_MAX 2.343110e+00
TIMESTEP 226733 / e 68 / bar1_score 8 / bar2_score 3 / REWARD -1 / Q_MAX 3.899764e-01
TIMESTEP 227030 / e 68 / bar1_score 9 / bar2_score 3 / REWARD 1 / Q_MAX 2.509165e+00
TIMESTEP 227413 / e 68 / bar1_score 10 / bar2_score 3 / REWARD 1 / Q_MAX 2.570424e+00
TIMESTEP 227794 / e 68 / bar1_score 11 / bar2_score 3 / REWARD 1 / Q_MAX 2.088870e+00
TIMESTEP 228175 / e 68 / bar1_score 12 / bar2_score 3 / REWARD 1 / Q_MAX 2.147549e+00
TIMESTEP 228556 / e 68 / bar1_score 13 / bar2_score 3 / REWARD 1 / Q_MAX 2.345756e+00
TIMESTEP 228937 / e 68 / bar1_score 14 / bar2_score 3 / REWARD 1 / Q_MAX 2.228972e+00
TIMESTEP 229066 / e 68 / bar1_score 14 / bar2_score 4 / REWARD -1 / Q_MAX 8.863239e-01
TIMESTEP 229195 / e 68 / bar1_score 15 / bar2_score 4 / REWARD 1 / Q_MAX 2.565245e+00
TIMESTEP 229576 / e 68 / bar1_score 16 / bar2_score 4 / 

TIMESTEP 250760 / e 71 / bar1_score 7 / bar2_score 9 / REWARD 1 / Q_MAX 2.562182e+00
TIMESTEP 251143 / e 71 / bar1_score 8 / bar2_score 9 / REWARD 1 / Q_MAX 2.013152e+00
TIMESTEP 251440 / e 71 / bar1_score 8 / bar2_score 10 / REWARD -1 / Q_MAX 3.824911e-01
TIMESTEP 251486 / e 71 / bar1_score 8 / bar2_score 11 / REWARD -1 / Q_MAX 1.188230e+00
TIMESTEP 251615 / e 71 / bar1_score 9 / bar2_score 11 / REWARD 1 / Q_MAX 2.865708e+00
TIMESTEP 251913 / e 71 / bar1_score 9 / bar2_score 12 / REWARD -1 / Q_MAX 8.799164e-01
TIMESTEP 252210 / e 71 / bar1_score 10 / bar2_score 12 / REWARD 1 / Q_MAX 2.446252e+00
TIMESTEP 252591 / e 71 / bar1_score 11 / bar2_score 12 / REWARD 1 / Q_MAX 2.218732e+00
TIMESTEP 252973 / e 71 / bar1_score 12 / bar2_score 12 / REWARD 1 / Q_MAX 2.429676e+00
TIMESTEP 253270 / e 71 / bar1_score 12 / bar2_score 13 / REWARD -1 / Q_MAX 5.104742e-01
TIMESTEP 253484 / e 71 / bar1_score 12 / bar2_score 14 / REWARD -1 / Q_MAX 9.257485e-01
TIMESTEP 253613 / e 71 / bar1_score 13 / bar2_

TIMESTEP 274089 / e 74 / bar1_score 5 / bar2_score 2 / REWARD 1 / Q_MAX 2.693209e+00
TIMESTEP 274218 / e 74 / bar1_score 5 / bar2_score 3 / REWARD -1 / Q_MAX 1.761739e+00
TIMESTEP 274264 / e 74 / bar1_score 5 / bar2_score 4 / REWARD -1 / Q_MAX 9.417787e-01
TIMESTEP 274310 / e 74 / bar1_score 5 / bar2_score 5 / REWARD -1 / Q_MAX 6.968480e-01
TIMESTEP 274439 / e 74 / bar1_score 6 / bar2_score 5 / REWARD 1 / Q_MAX 2.912810e+00
TIMESTEP 274737 / e 74 / bar1_score 6 / bar2_score 6 / REWARD -1 / Q_MAX 7.758842e-01
TIMESTEP 274783 / e 74 / bar1_score 6 / bar2_score 7 / REWARD -1 / Q_MAX 1.387941e+00
TIMESTEP 274912 / e 74 / bar1_score 7 / bar2_score 7 / REWARD 1 / Q_MAX 3.133453e+00
TIMESTEP 275295 / e 74 / bar1_score 8 / bar2_score 7 / REWARD 1 / Q_MAX 3.016558e+00
TIMESTEP 275424 / e 74 / bar1_score 8 / bar2_score 8 / REWARD -1 / Q_MAX 1.115874e+00
TIMESTEP 275553 / e 74 / bar1_score 9 / bar2_score 8 / REWARD 1 / Q_MAX 3.105376e+00
TIMESTEP 275934 / e 74 / bar1_score 10 / bar2_score 8 / REW

TIMESTEP 295234 / e 77 / bar1_score 0 / bar2_score 0 / REWARD 1 / Q_MAX 2.201921e+00
TIMESTEP 295615 / e 77 / bar1_score 1 / bar2_score 0 / REWARD 1 / Q_MAX 2.233849e+00
TIMESTEP 295998 / e 77 / bar1_score 2 / bar2_score 0 / REWARD 1 / Q_MAX 2.289490e+00
TIMESTEP 296381 / e 77 / bar1_score 3 / bar2_score 0 / REWARD 1 / Q_MAX 2.154293e+00
TIMESTEP 296680 / e 77 / bar1_score 3 / bar2_score 1 / REWARD -1 / Q_MAX 9.552223e-01
TIMESTEP 296896 / e 77 / bar1_score 3 / bar2_score 2 / REWARD -1 / Q_MAX 3.649420e-01
TIMESTEP 297025 / e 77 / bar1_score 4 / bar2_score 2 / REWARD 1 / Q_MAX 3.027594e+00
TIMESTEP 297154 / e 77 / bar1_score 4 / bar2_score 3 / REWARD -1 / Q_MAX 1.392761e+00
TIMESTEP 297283 / e 77 / bar1_score 5 / bar2_score 3 / REWARD 1 / Q_MAX 2.454198e+00
TIMESTEP 297580 / e 77 / bar1_score 5 / bar2_score 4 / REWARD -1 / Q_MAX 4.626905e-02
TIMESTEP 297794 / e 77 / bar1_score 5 / bar2_score 5 / REWARD -1 / Q_MAX 3.571235e-01
TIMESTEP 297840 / e 77 / bar1_score 5 / bar2_score 6 / REWAR

TIMESTEP 314518 / e 79 / bar1_score 13 / bar2_score 9 / REWARD 1 / Q_MAX 2.196803e+00
TIMESTEP 314901 / e 79 / bar1_score 14 / bar2_score 9 / REWARD 1 / Q_MAX 2.493323e+00
TIMESTEP 315284 / e 79 / bar1_score 15 / bar2_score 9 / REWARD 1 / Q_MAX 2.247601e+00
TIMESTEP 315581 / e 79 / bar1_score 15 / bar2_score 10 / REWARD -1 / Q_MAX 1.330484e+00
TIMESTEP 315795 / e 79 / bar1_score 15 / bar2_score 11 / REWARD -1 / Q_MAX 3.654320e-01
TIMESTEP 315924 / e 79 / bar1_score 16 / bar2_score 11 / REWARD 1 / Q_MAX 2.559313e+00
TIMESTEP 316305 / e 79 / bar1_score 17 / bar2_score 11 / REWARD 1 / Q_MAX 2.476660e+00
TIMESTEP 316434 / e 79 / bar1_score 17 / bar2_score 12 / REWARD -1 / Q_MAX 1.064069e+00
TIMESTEP 316563 / e 79 / bar1_score 18 / bar2_score 12 / REWARD 1 / Q_MAX 2.354178e+00
TIMESTEP 316944 / e 79 / bar1_score 19 / bar2_score 12 / REWARD 1 / Q_MAX 2.442541e+00
TIMESTEP 317241 / e 79 / bar1_score 19 / bar2_score 13 / REWARD -1 / Q_MAX 3.142902e-01
TIMESTEP 317455 / e 79 / bar1_score 19 / b

In [None]:
Jai Gurudev