# Capsule Network for RL

In [1]:
import tensorflow as tf
import cv2
import sys
import time 
sys.path.append("game/")
import wrapped_flappy_bird as game
import random
import numpy as np
from collections import deque
from matplotlib import pyplot as plt

pygame 1.9.4
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
epsilon = 1e-9
iter_routing = 2
train_freq = 10

In [3]:
GAME = 'bird' # the name of the game being played for log files
ACTIONS = 2 # number of valid actions
GAMMA = 0.99 # decay rate of past observations
OBSERVE = 1000 # timesteps to observe before training
EXPLORE = 3000000 # frames over which to anneal epsilon
FINAL_EPSILON = 0.0001 # final value of epsilon
INITIAL_EPSILON = 0.1 # starting value of epsilon
REPLAY_MEMORY = 500000 # number of previous transitions to remember
BATCH = 32 # size of minibatch

# UPDATED CAPSULE NETWORK

In [4]:
def squash(vector):
    vec_squared_norm = reduce_sum(tf.square(vector), -2, keepdims=True)
    scalar_factor = vec_squared_norm / (1 + vec_squared_norm) / tf.sqrt(vec_squared_norm + epsilon)
    vec_squashed = scalar_factor * vector  # element-wise
    return(vec_squashed)
def routing(input, b_IJ):
    # W: [1, num_caps_i, num_caps_j * len_v_j, len_u_j, 1]
    W = tf.get_variable('Weight', shape=(1, 1024, 160, 8, 1), dtype=tf.float32,
                        initializer=tf.random_normal_initializer(stddev=0.01))
    biases = tf.get_variable('bias', shape=(1, 1, 10, 16, 1))
    # A better solution is using element-wise multiply, reduce_sum and reshape
    # ops instead. Matmul [a, b] x [b, c] is equal to a series ops as
    # element-wise multiply [a*c, b] * [a*c, b], reduce_sum at axis=1 and
    # reshape to [a, c]
    input = tf.tile(input, [1, 1, 160, 1, 1])
    #assert input.get_shape() == [cfg.batch_size, 1024, 160, 8, 1]

    u_hat = reduce_sum(W * input, axis=3, keepdims=True)
    u_hat = tf.reshape(u_hat, shape=[-1, 1024, 10, 16, 1])
    #assert u_hat.get_shape() == [cfg.batch_size, 1024, 10, 16, 1]

    # In forward, u_hat_stopped = u_hat; in backward, no gradient passed back from u_hat_stopped to u_hat
    u_hat_stopped = tf.stop_gradient(u_hat, name='stop_gradient')

    # line 3,for r iterations do
    for r_iter in range(iter_routing):
        with tf.variable_scope('iter_' + str(r_iter)):
            # line 4:
            # => [batch_size, 1024, 10, 1, 1]
            c_IJ = softmax(b_IJ, axis=2)

            # At last iteration, use `u_hat` in order to receive gradients from the following graph
            if r_iter == iter_routing - 1:
                # line 5:
                # weighting u_hat with c_IJ, element-wise in the last two dims
                # => [batch_size, 1024, 10, 16, 1]
                s_J = tf.multiply(c_IJ, u_hat)
                # then sum in the second dim, resulting in [batch_size, 1, 10, 16, 1]
                s_J = reduce_sum(s_J, axis=1, keepdims=True) + biases
                #assert s_J.get_shape() == [cfg.batch_size, 1, 10, 16, 1]

                # line 6:
                # squash using Eq.1,
                v_J = squash(s_J)
                #assert v_J.get_shape() == [cfg.batch_size, 1, 10, 16, 1]
            elif r_iter < iter_routing - 1:  # Inner iterations, do not apply backpropagation
                s_J = tf.multiply(c_IJ, u_hat_stopped)
                s_J = reduce_sum(s_J, axis=1, keepdims=True) + biases
                v_J = squash(s_J)

                # line 7:
                # reshape & tile v_j from [batch_size ,1, 10, 16, 1] to [batch_size, 1024, 10, 16, 1]
                # then matmul in the last tow dim: [16, 1].T x [16, 1] => [1, 1], reduce mean in the
                # batch_size dim, resulting in [1, 1024, 10, 1, 1]
                v_J_tiled = tf.tile(v_J, [1, 1024, 1, 1, 1])
                u_produce_v = reduce_sum(u_hat_stopped * v_J_tiled, axis=3, keepdims=True)
                #assert u_produce_v.get_shape() == [cfg.batch_size, 1024, 10, 1, 1]

                # b_IJ += tf.reduce_sum(u_produce_v, axis=0, keep_dims=True)
                b_IJ += u_produce_v
    return(v_J)
# For version compatibility
def reduce_sum(input_tensor, axis=None, keepdims=False):
    return tf.reduce_sum(input_tensor, axis=axis, keepdims=keepdims)
# For version compatibility
def softmax(logits, axis=None):
    return tf.nn.softmax(logits, axis=axis)

In [5]:
def createNetwork():
    # input layer
    s= tf.placeholder("float", [None, 84, 84, 4])
    coeff = tf.placeholder(tf.float32, shape=(None, 1024, 10, 1, 1))
    ####################### New Network COnfiguration #####################    
    w_initializer, b_initializer = tf.random_normal_initializer(0., 0.01), tf.constant_initializer(0.01)
    w1 = tf.get_variable('w1',[8, 8, 4, 64],initializer=w_initializer)
    b1 = tf.get_variable('b1',[64],initializer=b_initializer)
    # Convolution Layer
    # Conv1, [batch_size, 20, 20, 64]
    l1 = tf.nn.conv2d(s, w1, strides=[1, 4, 4, 1], padding="VALID")
    
    conv1 = tf.nn.relu(tf.nn.bias_add(l1, b1))
    
    conv1 = tf.reshape(conv1,[-1,20,20,64])
    
    capsules = tf.contrib.layers.conv2d(conv1, 16 * 8, kernel_size=6, stride=2, padding="VALID",
                    activation_fn = tf.nn.relu,
                    weights_initializer = tf.contrib.layers.xavier_initializer(uniform=False),
                    biases_initializer=tf.constant_initializer(0))
    
    capsules = tf.reshape(capsules, (-1, 1024, 8, 1)) #Reshape to(batch_szie, 1152, 8, 1)
    
    capsules = squash(capsules)
    
    input_caps2 = tf.reshape(capsules, shape=(-1, 1024, 1, capsules.shape[-2].value, 1))
    
    caps2 = routing(input_caps2, coeff)
    
    vector_j = tf.reshape(caps2, shape=(-1, 160))
    #print(vector_j)
    q_eval = tf.contrib.layers.fully_connected(vector_j, num_outputs=ACTIONS, activation_fn=None)

    #print(q_eval)
    readout = q_eval
    return s, coeff, readout

In [6]:
def trainNetwork(s, coeff, readout, sess):
    tick = time.time()
    # define the cost function
    a = tf.placeholder("float", [None, ACTIONS])
    y = tf.placeholder("float", [None])
    readout_action = tf.reduce_sum(tf.multiply(readout, a), reduction_indices = 1)
    cost = tf.reduce_mean(tf.square(y - readout_action))
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cost)

    # open up a game state to communicate with emulator
    game_state = game.GameState()
    
    # store the previous observations in replay memory
    D = deque()
    
    # get the first state by doing nothing and preprocess the image to 84x84x4
    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] = 1
    x_t, r_0, terminal = game_state.frame_step(do_nothing)
    x_t = cv2.cvtColor(cv2.resize(x_t[:,:-110,:], (84, 84)), cv2.COLOR_BGR2GRAY)
    ret, x_t = cv2.threshold(x_t,1,255,cv2.THRESH_BINARY)
    s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)
    sess.run(tf.global_variables_initializer())
    # saving and loading networks
    # saver = tf.train.Saver()
    # 
    sess.run(tf.global_variables_initializer())
    b_IJ1 = np.zeros((1, 1024, 10, 1, 1)).astype(np.float32) # batch_size=1
    b_IJ2 = np.zeros((BATCH, 1024, 10, 1, 1)).astype(np.float32) # batch_size=BATCH
    epsilon = INITIAL_EPSILON
    t = 0
    pscore = 0
    episode = 0
    loss = 0
    tick = time.time()
    action_freq = np.zeros(ACTIONS)
    while True:
        # choose an action epsilon greedily
        # readout_t = readout.eval(feed_dict = {s : [s_t].reshape((1,80,80,4))})[0]
        
        readout_t = readout.eval(feed_dict = {s:s_t.reshape((1,84,84,4)), coeff:b_IJ1})
        
        a_t = np.zeros([ACTIONS])
        action_index = 0
        if random.random() <= epsilon or t <= OBSERVE:
            action_index = random.randrange(ACTIONS)
            a_t[action_index] = 1
        else:
            action_index = np.argmax(readout_t)
            a_t[action_index] = 1

        # scale down epsilon
        if epsilon > FINAL_EPSILON and t > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE

        # run the selected action and observe next state and reward
        x_t1_colored, r_t, terminal = game_state.frame_step(a_t)
        x_t1 = cv2.cvtColor(cv2.resize(x_t1_colored[:,:-110,:], (84, 84)), cv2.COLOR_BGR2GRAY)
        ret, x_t1 = cv2.threshold(x_t1, 1, 255, cv2.THRESH_BINARY)
        x_t1 = np.reshape(x_t1, (84, 84, 1))
        s_t1 = np.append(x_t1, s_t[:, :, :3], axis=2)
        action_freq += a_t
        # store the transition in D
        D.append((s_t, a_t, r_t, s_t1, terminal))
        if len(D) > REPLAY_MEMORY:
            D.popleft()
        
        # only train if done observing
        if t > OBSERVE and t%train_freq==0:
            # sample a minibatch to train on
            minibatch = random.sample(D, BATCH)

            # get the batch variables
            s_j_batch = [d[0] for d in minibatch]
            a_batch = [d[1] for d in minibatch]
            r_batch = [d[2] for d in minibatch]
            s_j1_batch = [d[3] for d in minibatch]

            y_batch = []
            readout_j1_batch = readout.eval(feed_dict = {s:s_j1_batch, coeff:b_IJ2 })
            #readout_j1_batch = readout.eval(feed_dict = {s : s_j1_batch})
            for i in range(0, len(minibatch)):
                # if terminal only equals reward
                if minibatch[i][4]:
                    y_batch.append(r_batch[i])
                else:
                    y_batch.append(r_batch[i] + GAMMA * np.max(readout_j1_batch[i]))

            # perform gradient step
            train_step.run(feed_dict = {
                y : y_batch,
                a : a_batch,
                s : s_j_batch,
                coeff: b_IJ2})
            loss = cost.eval(feed_dict = {
                y : y_batch,
                a : a_batch,
                s : s_j_batch,
                coeff: b_IJ2})

        # update the old values
        s_t = s_t1
        t += 1

        # save progress every 10000 iterations
        #if t % 10000 == 0:
        #    saver.save(sess, 'saved_networks/' + GAME + '-dqn', global_step = t)

        if(r_t==1):
            pscore += 1
        if(terminal ):#and (pscore > 5)
            print("ts", t,"action_freq",action_freq,"/e", round(epsilon,3),"/pscore",pscore,"/loss",loss,"/ Q_MAX %e" % np.max(readout_t))
            pscore = 0
        if(terminal == 1):
            episode +=1
            action_freq = np.zeros(ACTIONS)
        if(pscore > 50):
            print("Game_Ends_in Time:",int(time.time() - tick))
            break;
        # write info to files
        '''
        if t % 10000 <= 100:
            a_file.write(",".join([str(x) for x in readout_t]) + '\n')
            h_file.write(",".join([str(x) for x in h_fc1.eval(feed_dict={s:[s_t]})[0]]) + '\n')
            cv2.imwrite("logs_tetris/frame" + str(t) + ".png", x_t1)
        '''

In [7]:
def playGame():
    tf.reset_default_graph()
    sess = tf.InteractiveSession()
    s, coeff, readout = createNetwork()
    trainNetwork(s, coeff, readout, sess)

In [None]:
def main():
    playGame()

if __name__ == "__main__":
    tick = time.time()
    main()
    print("Game_Ends_in Time:",int(time.time() - tick))
    print("____________ END HERE _____________")

ts 49 action_freq [24. 25.] /e 0.1 /pscore 0 /loss 0 / Q_MAX 3.078833e-03
ts 99 action_freq [28. 22.] /e 0.1 /pscore 0 /loss 0 / Q_MAX 5.409284e-03
ts 149 action_freq [21. 29.] /e 0.1 /pscore 0 /loss 0 / Q_MAX -1.006978e-02
ts 199 action_freq [27. 23.] /e 0.1 /pscore 0 /loss 0 / Q_MAX -3.862311e-04
ts 249 action_freq [25. 25.] /e 0.1 /pscore 0 /loss 0 / Q_MAX 2.676354e-02
ts 299 action_freq [26. 24.] /e 0.1 /pscore 0 /loss 0 / Q_MAX 3.166082e-03
ts 349 action_freq [27. 23.] /e 0.1 /pscore 0 /loss 0 / Q_MAX 2.676354e-02
ts 399 action_freq [24. 26.] /e 0.1 /pscore 0 /loss 0 / Q_MAX 5.762247e-03
ts 449 action_freq [26. 24.] /e 0.1 /pscore 0 /loss 0 / Q_MAX 1.511778e-02
ts 499 action_freq [25. 25.] /e 0.1 /pscore 0 /loss 0 / Q_MAX 1.438736e-02
ts 549 action_freq [20. 30.] /e 0.1 /pscore 0 /loss 0 / Q_MAX 9.572075e-03
ts 599 action_freq [24. 26.] /e 0.1 /pscore 0 /loss 0 / Q_MAX 7.844073e-03
ts 649 action_freq [23. 27.] /e 0.1 /pscore 0 /loss 0 / Q_MAX 6.105557e-03
ts 699 action_freq [31. 1

ts 4928 action_freq [24. 26.] /e 0.1 /pscore 0 /loss 0.014120961 / Q_MAX -8.102278e-01
ts 4978 action_freq [14. 36.] /e 0.1 /pscore 0 /loss 0.024618758 / Q_MAX -8.924992e-01
ts 5038 action_freq [43. 17.] /e 0.1 /pscore 0 /loss 0.019017983 / Q_MAX 3.485318e+00
ts 5088 action_freq [31. 19.] /e 0.1 /pscore 0 /loss 0.018482523 / Q_MAX -3.293853e-01
ts 5138 action_freq [15. 35.] /e 0.1 /pscore 0 /loss 0.010801971 / Q_MAX -1.504183e+00
ts 5188 action_freq [29. 21.] /e 0.1 /pscore 0 /loss 0.021926899 / Q_MAX -1.516215e+00
ts 5238 action_freq [31. 19.] /e 0.1 /pscore 0 /loss 0.009256147 / Q_MAX -1.343125e+00
ts 5296 action_freq [41. 17.] /e 0.1 /pscore 0 /loss 0.012469083 / Q_MAX 3.755968e+00
ts 5346 action_freq [20. 30.] /e 0.1 /pscore 0 /loss 0.028921323 / Q_MAX -3.937433e-01
ts 5396 action_freq [17. 33.] /e 0.1 /pscore 0 /loss 0.008026065 / Q_MAX -8.829466e-01
ts 5446 action_freq [37. 13.] /e 0.1 /pscore 0 /loss 0.013610445 / Q_MAX -7.387761e-01
ts 5496 action_freq [40. 10.] /e 0.1 /pscore 

ts 10561 action_freq [43.  7.] /e 0.1 /pscore 0 /loss 0.045064136 / Q_MAX -3.452866e-01
ts 10611 action_freq [43.  7.] /e 0.1 /pscore 0 /loss 0.20621456 / Q_MAX -8.050400e-01
ts 10734 action_freq [109.  14.] /e 0.1 /pscore 2 /loss 0.276666 / Q_MAX -1.335990e+00
ts 10784 action_freq [32. 18.] /e 0.1 /pscore 0 /loss 0.029483171 / Q_MAX -2.466986e+00
ts 10834 action_freq [40. 10.] /e 0.1 /pscore 0 /loss 0.08584501 / Q_MAX -9.593888e-01
ts 10884 action_freq [28. 22.] /e 0.1 /pscore 0 /loss 0.5191419 / Q_MAX -1.359167e+00
ts 10934 action_freq [42.  8.] /e 0.1 /pscore 0 /loss 0.047883153 / Q_MAX -6.036069e-01
ts 11020 action_freq [71. 15.] /e 0.1 /pscore 1 /loss 0.025402404 / Q_MAX 5.760378e-01
ts 11070 action_freq [34. 16.] /e 0.1 /pscore 0 /loss 0.07222192 / Q_MAX 7.550623e-02
ts 11194 action_freq [106.  18.] /e 0.1 /pscore 2 /loss 0.13397548 / Q_MAX 2.658289e+00
ts 11244 action_freq [39. 11.] /e 0.1 /pscore 0 /loss 0.06313094 / Q_MAX 7.028741e-01
ts 11370 action_freq [110.  16.] /e 0.1 /p

ts 18290 action_freq [39. 11.] /e 0.099 /pscore 0 /loss 0.11991741 / Q_MAX -1.038910e+00
ts 18340 action_freq [44.  6.] /e 0.099 /pscore 0 /loss 0.055699352 / Q_MAX -6.453981e-01
ts 18399 action_freq [51.  8.] /e 0.099 /pscore 0 /loss 0.19137338 / Q_MAX -6.795680e-01
ts 18449 action_freq [44.  6.] /e 0.099 /pscore 0 /loss 0.14094645 / Q_MAX 6.812573e-01
ts 18536 action_freq [71. 16.] /e 0.099 /pscore 1 /loss 0.27790856 / Q_MAX 5.831531e-02
ts 18628 action_freq [79. 13.] /e 0.099 /pscore 1 /loss 0.19171111 / Q_MAX -1.216308e+00
ts 18726 action_freq [88. 10.] /e 0.099 /pscore 2 /loss 0.058988225 / Q_MAX 2.344613e+00
ts 18815 action_freq [79. 10.] /e 0.099 /pscore 1 /loss 0.06456471 / Q_MAX 7.866440e-01
ts 18867 action_freq [43.  9.] /e 0.099 /pscore 0 /loss 0.04498833 / Q_MAX 2.582504e+00
ts 19028 action_freq [143.  18.] /e 0.099 /pscore 3 /loss 0.06537008 / Q_MAX 2.621558e+00
ts 19079 action_freq [43.  8.] /e 0.099 /pscore 0 /loss 0.040771015 / Q_MAX -5.035661e-01
ts 19129 action_freq [

ts 26964 action_freq [127.  14.] /e 0.099 /pscore 3 /loss 0.24857865 / Q_MAX 4.862977e+00
ts 27014 action_freq [43.  7.] /e 0.099 /pscore 0 /loss 0.12485136 / Q_MAX 1.326251e+00
ts 27101 action_freq [80.  7.] /e 0.099 /pscore 1 /loss 0.100683466 / Q_MAX 2.894945e+00
ts 27188 action_freq [81.  6.] /e 0.099 /pscore 1 /loss 0.060982704 / Q_MAX 3.982066e+00
ts 27348 action_freq [142.  18.] /e 0.099 /pscore 3 /loss 0.5865413 / Q_MAX 2.853079e-01
ts 27492 action_freq [132.  12.] /e 0.099 /pscore 3 /loss 0.12996276 / Q_MAX 3.893101e+00
ts 27542 action_freq [44.  6.] /e 0.099 /pscore 0 /loss 0.057705358 / Q_MAX -1.357237e+00
ts 27592 action_freq [42.  8.] /e 0.099 /pscore 0 /loss 0.26413748 / Q_MAX -2.532436e+00
ts 27727 action_freq [122.  13.] /e 0.099 /pscore 2 /loss 0.07635939 / Q_MAX -4.939063e-01
ts 27864 action_freq [125.  12.] /e 0.099 /pscore 3 /loss 0.05110716 / Q_MAX -3.477673e-01
ts 27950 action_freq [76. 10.] /e 0.099 /pscore 1 /loss 0.120582275 / Q_MAX 8.978046e-01
ts 28088 action

ts 35946 action_freq [43.  7.] /e 0.099 /pscore 0 /loss 0.071790926 / Q_MAX -1.134769e+00
ts 36107 action_freq [150.  11.] /e 0.099 /pscore 3 /loss 0.121717334 / Q_MAX 2.199645e+00
ts 36174 action_freq [60.  7.] /e 0.099 /pscore 1 /loss 0.08684179 / Q_MAX 2.534642e+00
ts 36260 action_freq [78.  8.] /e 0.099 /pscore 1 /loss 0.12497333 / Q_MAX 7.585861e-01
ts 36310 action_freq [46.  4.] /e 0.099 /pscore 0 /loss 0.10535389 / Q_MAX 3.494999e-01
ts 36515 action_freq [187.  18.] /e 0.099 /pscore 4 /loss 0.13637888 / Q_MAX 6.943103e+00
ts 36656 action_freq [128.  13.] /e 0.099 /pscore 3 /loss 0.1766319 / Q_MAX 4.946415e+00
ts 36745 action_freq [80.  9.] /e 0.099 /pscore 1 /loss 0.14815083 / Q_MAX -4.818488e-01
ts 36849 action_freq [96.  8.] /e 0.099 /pscore 2 /loss 0.09907711 / Q_MAX 1.008347e+00
ts 36973 action_freq [115.   9.] /e 0.099 /pscore 2 /loss 0.14338626 / Q_MAX 2.527225e+00
ts 37023 action_freq [43.  7.] /e 0.099 /pscore 0 /loss 0.41432542 / Q_MAX -4.598785e-01
ts 37154 action_freq

ts 45540 action_freq [142.  18.] /e 0.099 /pscore 3 /loss 0.21320125 / Q_MAX -6.301595e-01
ts 45626 action_freq [75. 11.] /e 0.099 /pscore 1 /loss 0.22959499 / Q_MAX -1.855068e+00
ts 45835 action_freq [194.  15.] /e 0.099 /pscore 4 /loss 0.2768275 / Q_MAX 9.743516e-01
ts 45885 action_freq [43.  7.] /e 0.099 /pscore 0 /loss 0.092143595 / Q_MAX -3.560907e+00
ts 45935 action_freq [43.  7.] /e 0.099 /pscore 0 /loss 0.18030582 / Q_MAX -1.383456e+00
ts 45985 action_freq [45.  5.] /e 0.099 /pscore 0 /loss 0.24610287 / Q_MAX -1.101293e+00
ts 46108 action_freq [107.  16.] /e 0.098 /pscore 2 /loss 0.097059324 / Q_MAX -1.541355e+00
ts 46158 action_freq [22. 28.] /e 0.098 /pscore 0 /loss 0.161401 / Q_MAX -2.657917e+00
ts 46282 action_freq [111.  13.] /e 0.098 /pscore 2 /loss 0.17136908 / Q_MAX -4.753527e-01
ts 46333 action_freq [44.  7.] /e 0.098 /pscore 0 /loss 0.19935298 / Q_MAX 7.240454e-01
ts 46384 action_freq [42.  9.] /e 0.098 /pscore 0 /loss 0.6829633 / Q_MAX -2.888042e+00
ts 46434 action_f

ts 53800 action_freq [79.  7.] /e 0.098 /pscore 1 /loss 0.15378241 / Q_MAX -4.057664e-01
ts 53886 action_freq [78.  8.] /e 0.098 /pscore 1 /loss 0.08616575 / Q_MAX 2.913186e-01
ts 53953 action_freq [61.  6.] /e 0.098 /pscore 1 /loss 0.38315374 / Q_MAX 3.867064e+00
ts 54056 action_freq [94.  9.] /e 0.098 /pscore 2 /loss 1.1429173 / Q_MAX 5.671486e+00
ts 54106 action_freq [44.  6.] /e 0.098 /pscore 0 /loss 0.08228364 / Q_MAX -1.713014e+00
ts 54229 action_freq [112.  11.] /e 0.098 /pscore 2 /loss 0.23676339 / Q_MAX 4.585016e-01
ts 54284 action_freq [49.  6.] /e 0.098 /pscore 0 /loss 1.4164602 / Q_MAX -4.524915e+00
ts 54334 action_freq [42.  8.] /e 0.098 /pscore 0 /loss 0.12551016 / Q_MAX -2.575565e+00
ts 54398 action_freq [57.  7.] /e 0.098 /pscore 1 /loss 0.06600796 / Q_MAX -2.643019e+00
ts 54533 action_freq [124.  11.] /e 0.098 /pscore 2 /loss 0.06378727 / Q_MAX -3.618335e-01
ts 54620 action_freq [78.  9.] /e 0.098 /pscore 1 /loss 0.07152249 / Q_MAX 4.530812e+00
ts 54709 action_freq [82

ts 61856 action_freq [75. 11.] /e 0.098 /pscore 1 /loss 0.14154208 / Q_MAX -8.395364e-01
ts 61910 action_freq [47.  7.] /e 0.098 /pscore 0 /loss 0.0774481 / Q_MAX 1.332715e+00
ts 61960 action_freq [38. 12.] /e 0.098 /pscore 0 /loss 0.07673085 / Q_MAX -1.267617e+00
ts 62200 action_freq [219.  21.] /e 0.098 /pscore 5 /loss 0.144092 / Q_MAX 7.296845e-01
ts 62257 action_freq [43. 14.] /e 0.098 /pscore 0 /loss 0.33055043 / Q_MAX 5.597706e-01
ts 62418 action_freq [143.  18.] /e 0.098 /pscore 3 /loss 0.11383361 / Q_MAX -3.887526e-01
ts 62474 action_freq [48.  8.] /e 0.098 /pscore 0 /loss 1.036536 / Q_MAX -2.993407e+00
ts 62525 action_freq [47.  4.] /e 0.098 /pscore 0 /loss 0.115681544 / Q_MAX -6.041161e-01
ts 62583 action_freq [53.  5.] /e 0.098 /pscore 0 /loss 0.23952854 / Q_MAX 8.915227e+00
ts 62832 action_freq [231.  18.] /e 0.098 /pscore 6 /loss 0.37018776 / Q_MAX 2.817147e+00
ts 63194 action_freq [334.  28.] /e 0.098 /pscore 9 /loss 0.113454804 / Q_MAX -1.029224e+00
ts 63244 action_freq 

ts 71185 action_freq [96.  9.] /e 0.098 /pscore 2 /loss 0.13922042 / Q_MAX 9.490710e+00
ts 71247 action_freq [57.  5.] /e 0.098 /pscore 1 /loss 0.32280633 / Q_MAX 7.014517e-01
ts 71371 action_freq [111.  13.] /e 0.098 /pscore 2 /loss 0.056271274 / Q_MAX -1.805355e-01
ts 71458 action_freq [76. 11.] /e 0.098 /pscore 1 /loss 0.119569965 / Q_MAX 2.858418e-01
ts 71523 action_freq [52. 13.] /e 0.098 /pscore 1 /loss 0.2744154 / Q_MAX -1.353838e-01
ts 71573 action_freq [33. 17.] /e 0.098 /pscore 0 /loss 0.22106771 / Q_MAX -1.441899e+00
ts 71676 action_freq [91. 12.] /e 0.098 /pscore 2 /loss 0.15248169 / Q_MAX 6.613518e+00
ts 71726 action_freq [40. 10.] /e 0.098 /pscore 0 /loss 0.43758076 / Q_MAX -1.128190e+00
ts 71793 action_freq [58.  9.] /e 0.098 /pscore 1 /loss 0.085448265 / Q_MAX 1.080974e+01
ts 71843 action_freq [35. 15.] /e 0.098 /pscore 0 /loss 0.11212995 / Q_MAX -1.406720e+00
ts 71944 action_freq [91. 10.] /e 0.098 /pscore 2 /loss 0.15124479 / Q_MAX 6.425413e+00
ts 71994 action_freq [4

ts 79988 action_freq [109.  14.] /e 0.097 /pscore 2 /loss 0.28463787 / Q_MAX -5.276225e-01
ts 80038 action_freq [44.  6.] /e 0.097 /pscore 0 /loss 0.1267272 / Q_MAX 3.134356e+00
ts 80088 action_freq [44.  6.] /e 0.097 /pscore 0 /loss 0.17195083 / Q_MAX -6.237822e-01
ts 80396 action_freq [284.  24.] /e 0.097 /pscore 7 /loss 0.2380892 / Q_MAX 5.781087e-02
ts 80462 action_freq [55. 11.] /e 0.097 /pscore 1 /loss 0.14758947 / Q_MAX -7.390038e-01
ts 80526 action_freq [52. 12.] /e 0.097 /pscore 1 /loss 0.2585503 / Q_MAX -8.595489e-01
ts 80612 action_freq [74. 12.] /e 0.097 /pscore 1 /loss 0.25391185 / Q_MAX -1.192342e+00
ts 80664 action_freq [46.  6.] /e 0.097 /pscore 0 /loss 0.09755857 / Q_MAX 3.746207e+00
ts 80788 action_freq [114.  10.] /e 0.097 /pscore 2 /loss 0.21104163 / Q_MAX -1.470038e+00
ts 80838 action_freq [44.  6.] /e 0.097 /pscore 0 /loss 0.2205413 / Q_MAX -1.104695e+00
ts 81054 action_freq [198.  18.] /e 0.097 /pscore 5 /loss 0.23236029 / Q_MAX -2.733563e-01
ts 81178 action_freq

ts 90082 action_freq [32. 18.] /e 0.097 /pscore 0 /loss 0.118169524 / Q_MAX -1.135653e+00
ts 90168 action_freq [75. 11.] /e 0.097 /pscore 1 /loss 0.2198015 / Q_MAX -7.916407e-01
ts 90375 action_freq [180.  27.] /e 0.097 /pscore 4 /loss 0.07573198 / Q_MAX -2.993445e-01
ts 90425 action_freq [37. 13.] /e 0.097 /pscore 0 /loss 0.039246425 / Q_MAX -1.848521e+00
ts 90514 action_freq [71. 18.] /e 0.097 /pscore 1 /loss 0.03604478 / Q_MAX -4.925484e-01
ts 90567 action_freq [40. 13.] /e 0.097 /pscore 0 /loss 0.2110942 / Q_MAX 3.486208e-01
ts 90617 action_freq [36. 14.] /e 0.097 /pscore 0 /loss 0.09649365 / Q_MAX -1.047006e+00
ts 90667 action_freq [36. 14.] /e 0.097 /pscore 0 /loss 0.6169791 / Q_MAX 1.252602e+01
ts 90717 action_freq [42.  8.] /e 0.097 /pscore 0 /loss 0.1992409 / Q_MAX -1.379221e+00
ts 90778 action_freq [48. 13.] /e 0.097 /pscore 0 /loss 3.3389792 / Q_MAX 3.796146e+00
ts 90828 action_freq [37. 13.] /e 0.097 /pscore 0 /loss 0.07778564 / Q_MAX -1.068905e+00
ts 90895 action_freq [55.

ts 99470 action_freq [34. 16.] /e 0.097 /pscore 0 /loss 0.5413803 / Q_MAX -8.943783e-01
ts 99520 action_freq [39. 11.] /e 0.097 /pscore 0 /loss 0.08766623 / Q_MAX -9.800223e-01
ts 99583 action_freq [44. 19.] /e 0.097 /pscore 1 /loss 0.11504561 / Q_MAX -9.030959e-01
ts 99633 action_freq [29. 21.] /e 0.097 /pscore 0 /loss 0.19684808 / Q_MAX -9.850302e-01
ts 99719 action_freq [74. 12.] /e 0.097 /pscore 1 /loss 0.07123908 / Q_MAX -9.654980e-01
ts 99769 action_freq [32. 18.] /e 0.097 /pscore 0 /loss 0.091383345 / Q_MAX -5.585929e-01
ts 99940 action_freq [145.  26.] /e 0.097 /pscore 3 /loss 0.116880275 / Q_MAX 9.142361e-01
ts 100007 action_freq [55. 12.] /e 0.097 /pscore 1 /loss 0.23781873 / Q_MAX 2.477976e+00
ts 100057 action_freq [38. 12.] /e 0.097 /pscore 0 /loss 0.3861792 / Q_MAX -4.673117e-01
ts 100181 action_freq [114.  10.] /e 0.097 /pscore 2 /loss 0.10203876 / Q_MAX 1.060406e+00
ts 100348 action_freq [154.  13.] /e 0.097 /pscore 3 /loss 0.042877886 / Q_MAX 2.628257e+00
ts 100398 acti

ts 107860 action_freq [110.  14.] /e 0.096 /pscore 2 /loss 0.15770847 / Q_MAX 3.075609e+00
ts 107987 action_freq [116.  11.] /e 0.096 /pscore 2 /loss 0.17787564 / Q_MAX 1.213031e+00
ts 108038 action_freq [46.  5.] /e 0.096 /pscore 0 /loss 0.053394645 / Q_MAX 2.869766e-01
ts 108124 action_freq [69. 17.] /e 0.096 /pscore 1 /loss 0.4141847 / Q_MAX 3.069160e-01
ts 108191 action_freq [53. 14.] /e 0.096 /pscore 1 /loss 0.17095897 / Q_MAX 1.279314e+00
ts 108246 action_freq [40. 15.] /e 0.096 /pscore 0 /loss 0.15586218 / Q_MAX -3.297977e-01
ts 108370 action_freq [108.  16.] /e 0.096 /pscore 2 /loss 1.8333228 / Q_MAX 1.312939e+00
ts 108425 action_freq [39. 16.] /e 0.096 /pscore 0 /loss 0.14042123 / Q_MAX -2.541508e+00
ts 108594 action_freq [150.  19.] /e 0.096 /pscore 3 /loss 0.26338682 / Q_MAX -1.206952e+00
ts 108842 action_freq [224.  24.] /e 0.096 /pscore 6 /loss 0.10675025 / Q_MAX -1.969006e-01
ts 108966 action_freq [105.  19.] /e 0.096 /pscore 2 /loss 0.25154135 / Q_MAX -6.622179e-01
ts 10

ts 116708 action_freq [24. 26.] /e 0.096 /pscore 0 /loss 0.21870969 / Q_MAX 3.784125e-01
ts 116842 action_freq [115.  19.] /e 0.096 /pscore 2 /loss 0.08852443 / Q_MAX 1.220250e+00
ts 117049 action_freq [181.  26.] /e 0.096 /pscore 4 /loss 0.15256494 / Q_MAX 9.701981e+00
ts 117183 action_freq [122.  12.] /e 0.096 /pscore 2 /loss 0.122942805 / Q_MAX 3.811840e-01
ts 117240 action_freq [49.  8.] /e 0.096 /pscore 0 /loss 0.25709015 / Q_MAX 2.596861e+00
ts 117290 action_freq [24. 26.] /e 0.096 /pscore 0 /loss 0.20906933 / Q_MAX 2.664205e-02
ts 117340 action_freq [30. 20.] /e 0.096 /pscore 0 /loss 0.13934055 / Q_MAX -9.866257e-01
ts 117427 action_freq [69. 18.] /e 0.096 /pscore 1 /loss 0.042161517 / Q_MAX 1.209728e+01
ts 117477 action_freq [38. 12.] /e 0.096 /pscore 0 /loss 1.8890291 / Q_MAX 7.528441e+00
ts 117601 action_freq [107.  17.] /e 0.096 /pscore 2 /loss 0.580279 / Q_MAX -3.677072e+00
ts 117651 action_freq [33. 17.] /e 0.096 /pscore 0 /loss 0.35810882 / Q_MAX -1.925097e+00
ts 117701 a

ts 125289 action_freq [86. 13.] /e 0.096 /pscore 2 /loss 0.14350599 / Q_MAX -3.193830e+00
ts 125339 action_freq [38. 12.] /e 0.096 /pscore 0 /loss 0.3848085 / Q_MAX -7.917682e-01
ts 125389 action_freq [37. 13.] /e 0.096 /pscore 0 /loss 0.11398474 / Q_MAX -7.040263e-01
ts 125492 action_freq [83. 20.] /e 0.096 /pscore 2 /loss 0.16836455 / Q_MAX 2.498799e+00
ts 125544 action_freq [39. 13.] /e 0.096 /pscore 0 /loss 0.24086326 / Q_MAX -1.686416e+00
ts 125630 action_freq [69. 17.] /e 0.096 /pscore 1 /loss 0.17842601 / Q_MAX -4.161051e-01
ts 125688 action_freq [47. 11.] /e 0.096 /pscore 0 /loss 0.2109301 / Q_MAX -1.959155e-01
ts 125922 action_freq [213.  21.] /e 0.096 /pscore 5 /loss 0.2563241 / Q_MAX -1.596978e+00
ts 125972 action_freq [32. 18.] /e 0.096 /pscore 0 /loss 0.24470943 / Q_MAX -1.464833e+00
ts 126022 action_freq [37. 13.] /e 0.096 /pscore 0 /loss 1.113123 / Q_MAX -2.066455e+00
ts 126109 action_freq [69. 18.] /e 0.096 /pscore 1 /loss 0.093421586 / Q_MAX -3.753681e+00
ts 126173 act

ts 133086 action_freq [37. 13.] /e 0.096 /pscore 0 /loss 0.0802328 / Q_MAX -6.927221e-01
ts 133224 action_freq [121.  17.] /e 0.096 /pscore 3 /loss 1.6993927 / Q_MAX 2.219022e-01
ts 133274 action_freq [38. 12.] /e 0.096 /pscore 0 /loss 0.10921383 / Q_MAX 7.237761e+00
ts 133562 action_freq [256.  32.] /e 0.096 /pscore 7 /loss 0.019769737 / Q_MAX 3.912760e+00
ts 133612 action_freq [42.  8.] /e 0.096 /pscore 0 /loss 0.08417025 / Q_MAX -6.221894e-01
ts 133662 action_freq [26. 24.] /e 0.096 /pscore 0 /loss 0.07275182 / Q_MAX -5.210619e-01
ts 133727 action_freq [47. 18.] /e 0.096 /pscore 1 /loss 0.14499888 / Q_MAX 1.010274e+01
ts 133790 action_freq [46. 17.] /e 0.096 /pscore 1 /loss 0.46133184 / Q_MAX -5.864255e-01
ts 134024 action_freq [207.  27.] /e 0.096 /pscore 5 /loss 4.1195784 / Q_MAX -9.500362e-01
ts 134189 action_freq [143.  22.] /e 0.096 /pscore 3 /loss 0.6720803 / Q_MAX 5.540677e+00
ts 134276 action_freq [76. 11.] /e 0.096 /pscore 1 /loss 0.11348993 / Q_MAX 1.155727e-01
ts 134326 a

ts 140797 action_freq [43.  7.] /e 0.095 /pscore 0 /loss 0.13996573 / Q_MAX -7.310362e-01
ts 140920 action_freq [111.  12.] /e 0.095 /pscore 2 /loss 0.12471245 / Q_MAX -9.685329e-01
ts 141043 action_freq [113.  10.] /e 0.095 /pscore 2 /loss 0.17172125 / Q_MAX -8.263592e-01
ts 141137 action_freq [78. 16.] /e 0.095 /pscore 1 /loss 0.40726188 / Q_MAX 2.550719e+00
ts 141201 action_freq [46. 18.] /e 0.095 /pscore 1 /loss 0.3151304 / Q_MAX -6.574856e-01
ts 141301 action_freq [77. 23.] /e 0.095 /pscore 2 /loss 0.5140089 / Q_MAX 2.054240e+00
ts 141475 action_freq [144.  30.] /e 0.095 /pscore 4 /loss 0.29148903 / Q_MAX 1.185312e+00
ts 141608 action_freq [113.  20.] /e 0.095 /pscore 2 /loss 0.14804533 / Q_MAX 1.638137e+00
ts 141712 action_freq [81. 23.] /e 0.095 /pscore 2 /loss 0.15051274 / Q_MAX 1.455245e+00
ts 141836 action_freq [90. 34.] /e 0.095 /pscore 2 /loss 0.14818682 / Q_MAX 3.979611e-01
ts 141886 action_freq [36. 14.] /e 0.095 /pscore 0 /loss 0.47701266 / Q_MAX -4.079252e-01
ts 141936 

ts 148405 action_freq [29. 21.] /e 0.095 /pscore 0 /loss 0.10473778 / Q_MAX -8.166413e-01
ts 148500 action_freq [80. 15.] /e 0.095 /pscore 1 /loss 0.10196118 / Q_MAX -8.285809e-01
ts 148781 action_freq [249.  32.] /e 0.095 /pscore 6 /loss 0.5479137 / Q_MAX -2.447000e-01
ts 148831 action_freq [35. 15.] /e 0.095 /pscore 0 /loss 0.32631055 / Q_MAX -3.008983e-02
ts 148881 action_freq [43.  7.] /e 0.095 /pscore 0 /loss 0.43602252 / Q_MAX -7.875866e-01
ts 148967 action_freq [74. 12.] /e 0.095 /pscore 1 /loss 0.2069822 / Q_MAX -5.263829e-01
ts 149017 action_freq [18. 32.] /e 0.095 /pscore 0 /loss 0.13185519 / Q_MAX -1.124483e+00
ts 149067 action_freq [25. 25.] /e 0.095 /pscore 0 /loss 0.47824633 / Q_MAX -9.950882e-01
ts 149265 action_freq [169.  29.] /e 0.095 /pscore 4 /loss 0.0777417 / Q_MAX 8.145238e+00
ts 149315 action_freq [42.  8.] /e 0.095 /pscore 0 /loss 0.20114541 / Q_MAX -8.269442e-01
ts 149413 action_freq [85. 13.] /e 0.095 /pscore 2 /loss 0.1678759 / Q_MAX -1.754691e+00
ts 149463 a

ts 157264 action_freq [37. 13.] /e 0.095 /pscore 0 /loss 0.21533504 / Q_MAX -1.168703e+00
ts 157368 action_freq [89. 15.] /e 0.095 /pscore 2 /loss 0.17289495 / Q_MAX 1.396117e+01
ts 157418 action_freq [42.  8.] /e 0.095 /pscore 0 /loss 0.20422766 / Q_MAX 3.617033e+00
ts 157468 action_freq [31. 19.] /e 0.095 /pscore 0 /loss 0.19192441 / Q_MAX -1.333115e+00
ts 157518 action_freq [24. 26.] /e 0.095 /pscore 0 /loss 0.4816379 / Q_MAX -9.833602e-01
ts 157613 action_freq [84. 11.] /e 0.095 /pscore 1 /loss 0.5944338 / Q_MAX -1.318632e+00
ts 157858 action_freq [226.  19.] /e 0.095 /pscore 5 /loss 0.08327518 / Q_MAX 5.628926e-01
ts 157908 action_freq [31. 19.] /e 0.095 /pscore 0 /loss 0.3949962 / Q_MAX -1.002765e+00
ts 157958 action_freq [42.  8.] /e 0.095 /pscore 0 /loss 0.3512138 / Q_MAX 7.177085e-01
ts 158063 action_freq [90. 15.] /e 0.095 /pscore 2 /loss 0.18132669 / Q_MAX 9.925921e+00
ts 158159 action_freq [84. 12.] /e 0.095 /pscore 1 /loss 0.059042268 / Q_MAX -1.541530e+00
ts 158319 action

ts 165785 action_freq [35. 15.] /e 0.095 /pscore 0 /loss 0.14345133 / Q_MAX -7.245240e-01
ts 165845 action_freq [53.  7.] /e 0.095 /pscore 0 /loss 0.092478976 / Q_MAX -2.359640e+00
ts 166079 action_freq [209.  25.] /e 0.095 /pscore 5 /loss 0.10319196 / Q_MAX 2.716975e-01
ts 166174 action_freq [84. 11.] /e 0.094 /pscore 1 /loss 0.12841356 / Q_MAX 8.137465e-01
ts 166224 action_freq [39. 11.] /e 0.094 /pscore 0 /loss 0.36912936 / Q_MAX 1.730173e+00
ts 166364 action_freq [119.  21.] /e 0.094 /pscore 3 /loss 0.112689815 / Q_MAX 3.884913e-01
ts 166414 action_freq [33. 17.] /e 0.094 /pscore 0 /loss 0.13476153 / Q_MAX -7.729309e-01
ts 166464 action_freq [31. 19.] /e 0.094 /pscore 0 /loss 0.14835303 / Q_MAX -3.811699e-02
ts 166514 action_freq [11. 39.] /e 0.094 /pscore 0 /loss 0.058299076 / Q_MAX -1.050153e+00
ts 166610 action_freq [76. 20.] /e 0.094 /pscore 1 /loss 3.7872972 / Q_MAX 5.113462e-01
ts 166770 action_freq [129.  31.] /e 0.094 /pscore 3 /loss 0.080137946 / Q_MAX -1.306483e+00
ts 166

ts 174190 action_freq [174.  24.] /e 0.094 /pscore 4 /loss 0.8333396 / Q_MAX -2.762669e-01
ts 174240 action_freq [22. 28.] /e 0.094 /pscore 0 /loss 0.12196776 / Q_MAX -3.199862e-02
ts 174290 action_freq [37. 13.] /e 0.094 /pscore 0 /loss 0.20714657 / Q_MAX 1.079248e+01
ts 174340 action_freq [19. 31.] /e 0.094 /pscore 0 /loss 0.4326296 / Q_MAX -3.357883e+00
ts 174390 action_freq [14. 36.] /e 0.094 /pscore 0 /loss 0.24281426 / Q_MAX -7.629660e-01
ts 174442 action_freq [39. 13.] /e 0.094 /pscore 0 /loss 0.12690881 / Q_MAX 1.473674e+01
ts 174501 action_freq [50.  9.] /e 0.094 /pscore 0 /loss 0.17843573 / Q_MAX 1.831702e+00
ts 174551 action_freq [27. 23.] /e 0.094 /pscore 0 /loss 0.041108306 / Q_MAX -1.088908e+00
ts 174601 action_freq [39. 11.] /e 0.094 /pscore 0 /loss 0.10695213 / Q_MAX -1.455559e+00
ts 174687 action_freq [71. 15.] /e 0.094 /pscore 1 /loss 0.068490185 / Q_MAX -3.511122e+00
ts 174737 action_freq [39. 11.] /e 0.094 /pscore 0 /loss 0.27914727 / Q_MAX -6.925298e-01
ts 174896 a

ts 182061 action_freq [41.  9.] /e 0.094 /pscore 0 /loss 0.5191624 / Q_MAX -1.021742e+00
ts 182369 action_freq [280.  28.] /e 0.094 /pscore 7 /loss 0.10315777 / Q_MAX -1.627775e+00
ts 182419 action_freq [29. 21.] /e 0.094 /pscore 0 /loss 0.13122177 / Q_MAX -8.587669e-01
ts 182590 action_freq [147.  24.] /e 0.094 /pscore 3 /loss 0.090769716 / Q_MAX 6.277707e-02
ts 182640 action_freq [40. 10.] /e 0.094 /pscore 0 /loss 0.23034656 / Q_MAX -2.403922e-01
ts 182691 action_freq [47.  4.] /e 0.094 /pscore 0 /loss 0.2174744 / Q_MAX 4.352362e-01
ts 182863 action_freq [153.  19.] /e 0.094 /pscore 4 /loss 0.06644062 / Q_MAX 1.494542e+01
ts 182913 action_freq [36. 14.] /e 0.094 /pscore 0 /loss 0.4304128 / Q_MAX -9.133220e-01
ts 182963 action_freq [30. 20.] /e 0.094 /pscore 0 /loss 0.13537702 / Q_MAX 1.705861e-01
ts 183049 action_freq [61. 25.] /e 0.094 /pscore 1 /loss 0.14717379 / Q_MAX -8.044267e-01
ts 183108 action_freq [39. 20.] /e 0.094 /pscore 0 /loss 0.16508768 / Q_MAX 1.486950e-01
ts 183233 a

ts 191855 action_freq [198.  36.] /e 0.094 /pscore 5 /loss 0.41359943 / Q_MAX -1.131886e+00
ts 191905 action_freq [40. 10.] /e 0.094 /pscore 0 /loss 0.42663354 / Q_MAX -1.481437e+00
ts 192111 action_freq [188.  18.] /e 0.094 /pscore 4 /loss 0.24563462 / Q_MAX -2.709720e-01
ts 192161 action_freq [36. 14.] /e 0.094 /pscore 0 /loss 0.3463634 / Q_MAX -1.278134e+00
ts 192247 action_freq [71. 15.] /e 0.094 /pscore 1 /loss 0.08579372 / Q_MAX 5.665910e-01
ts 192297 action_freq [36. 14.] /e 0.094 /pscore 0 /loss 0.45943156 / Q_MAX 1.002620e+01
ts 192643 action_freq [314.  32.] /e 0.094 /pscore 8 /loss 0.28024012 / Q_MAX -9.351166e-01
ts 192693 action_freq [42.  8.] /e 0.094 /pscore 0 /loss 0.22942664 / Q_MAX 4.377985e-02
ts 192743 action_freq [39. 11.] /e 0.094 /pscore 0 /loss 0.0968078 / Q_MAX -7.796457e-01
ts 192884 action_freq [127.  14.] /e 0.094 /pscore 3 /loss 0.099063 / Q_MAX 2.860907e+00
ts 192934 action_freq [31. 19.] /e 0.094 /pscore 0 /loss 0.09422737 / Q_MAX -6.795934e-01
ts 193094 

ts 200739 action_freq [14. 36.] /e 0.093 /pscore 0 /loss 0.73353165 / Q_MAX -7.975659e-01
ts 200826 action_freq [65. 22.] /e 0.093 /pscore 1 /loss 0.32916817 / Q_MAX -1.144925e-02
ts 200876 action_freq [38. 12.] /e 0.093 /pscore 0 /loss 1.3061122 / Q_MAX -9.941391e-01
ts 201000 action_freq [108.  16.] /e 0.093 /pscore 2 /loss 0.075892776 / Q_MAX -1.168764e+00
ts 201095 action_freq [88.  7.] /e 0.093 /pscore 1 /loss 0.6188439 / Q_MAX 2.468503e+00
ts 201255 action_freq [140.  20.] /e 0.093 /pscore 3 /loss 0.089059554 / Q_MAX -6.092560e-01
ts 201305 action_freq [34. 16.] /e 0.093 /pscore 0 /loss 0.5127926 / Q_MAX -1.114804e+00
ts 201398 action_freq [74. 19.] /e 0.093 /pscore 1 /loss 0.16576405 / Q_MAX -2.356115e+00
ts 201485 action_freq [66. 21.] /e 0.093 /pscore 1 /loss 0.13358971 / Q_MAX -8.864176e-01
ts 201644 action_freq [136.  23.] /e 0.093 /pscore 3 /loss 0.224274 / Q_MAX -3.561952e-01
ts 201779 action_freq [108.  27.] /e 0.093 /pscore 2 /loss 0.10719384 / Q_MAX -2.110717e+00
ts 201

ts 208831 action_freq [40. 10.] /e 0.093 /pscore 0 /loss 0.12015922 / Q_MAX -6.848641e-01
ts 208929 action_freq [87. 11.] /e 0.093 /pscore 2 /loss 0.096933864 / Q_MAX -2.305440e+00
ts 209067 action_freq [122.  16.] /e 0.093 /pscore 3 /loss 0.13258195 / Q_MAX 1.367317e+01
ts 209117 action_freq [33. 17.] /e 0.093 /pscore 0 /loss 0.256384 / Q_MAX -4.760550e-01
ts 209169 action_freq [42. 10.] /e 0.093 /pscore 0 /loss 0.07487835 / Q_MAX -1.302910e+00
ts 209338 action_freq [147.  22.] /e 0.093 /pscore 3 /loss 0.5869818 / Q_MAX 9.049220e+00
ts 209424 action_freq [73. 13.] /e 0.093 /pscore 1 /loss 0.33586428 / Q_MAX -3.736344e-01
ts 209510 action_freq [68. 18.] /e 0.093 /pscore 1 /loss 0.3571192 / Q_MAX 1.783662e-02
ts 209560 action_freq [32. 18.] /e 0.093 /pscore 0 /loss 0.18448913 / Q_MAX -1.150165e+00
ts 209628 action_freq [49. 19.] /e 0.093 /pscore 1 /loss 0.12889834 / Q_MAX 1.651969e+01
ts 209678 action_freq [31. 19.] /e 0.093 /pscore 0 /loss 0.21169135 / Q_MAX -7.028197e-01
ts 209728 act

ts 218145 action_freq [70. 16.] /e 0.093 /pscore 1 /loss 0.24199335 / Q_MAX -9.609984e-01
ts 218195 action_freq [34. 16.] /e 0.093 /pscore 0 /loss 0.22676359 / Q_MAX -7.511134e-01
ts 218541 action_freq [302.  44.] /e 0.093 /pscore 8 /loss 0.075381905 / Q_MAX -1.090544e-02
ts 218633 action_freq [83.  9.] /e 0.093 /pscore 1 /loss 0.15445739 / Q_MAX 6.167352e-01
ts 218692 action_freq [48. 11.] /e 0.093 /pscore 0 /loss 0.9415184 / Q_MAX 8.361895e+00
ts 218742 action_freq [33. 17.] /e 0.093 /pscore 0 /loss 0.039098296 / Q_MAX -3.462192e-01
ts 218806 action_freq [47. 17.] /e 0.093 /pscore 1 /loss 0.11754494 / Q_MAX 4.576284e-01
ts 218865 action_freq [46. 13.] /e 0.093 /pscore 0 /loss 0.10411466 / Q_MAX -9.182203e-01
ts 219030 action_freq [143.  22.] /e 0.093 /pscore 3 /loss 0.18793955 / Q_MAX 2.456137e+00
ts 219080 action_freq [34. 16.] /e 0.093 /pscore 0 /loss 0.18270011 / Q_MAX -8.558388e-01
ts 219130 action_freq [20. 30.] /e 0.093 /pscore 0 /loss 0.18019623 / Q_MAX -7.053542e-01
ts 219180

ts 226118 action_freq [75. 14.] /e 0.093 /pscore 1 /loss 0.08353146 / Q_MAX -5.420573e-01
ts 226181 action_freq [49. 14.] /e 0.093 /pscore 1 /loss 0.06697961 / Q_MAX -4.507183e-01
ts 226232 action_freq [40. 11.] /e 0.092 /pscore 0 /loss 0.36392492 / Q_MAX -2.935239e-01
ts 226477 action_freq [213.  32.] /e 0.092 /pscore 5 /loss 0.08316834 / Q_MAX -6.875199e-01
ts 226527 action_freq [41.  9.] /e 0.092 /pscore 0 /loss 0.05904582 / Q_MAX -7.156866e-01
ts 226594 action_freq [55. 12.] /e 0.092 /pscore 1 /loss 0.04044212 / Q_MAX -8.658079e-01
ts 226660 action_freq [49. 17.] /e 0.092 /pscore 1 /loss 0.47686785 / Q_MAX -1.476459e+00
ts 226726 action_freq [56. 10.] /e 0.092 /pscore 1 /loss 0.075720966 / Q_MAX 1.006852e+01
ts 226855 action_freq [116.  13.] /e 0.092 /pscore 2 /loss 0.3462882 / Q_MAX -2.378520e+00
ts 226905 action_freq [42.  8.] /e 0.092 /pscore 0 /loss 0.09097122 / Q_MAX -2.569552e+00
ts 226955 action_freq [40. 10.] /e 0.092 /pscore 0 /loss 0.06535467 / Q_MAX 6.406761e+00
ts 22707

ts 234158 action_freq [38. 12.] /e 0.092 /pscore 0 /loss 0.23987113 / Q_MAX -5.774415e-01
ts 234208 action_freq [44.  6.] /e 0.092 /pscore 0 /loss 0.17732029 / Q_MAX -2.081357e-01
ts 234258 action_freq [41.  9.] /e 0.092 /pscore 0 /loss 0.15593712 / Q_MAX -4.967117e-01
ts 234308 action_freq [40. 10.] /e 0.092 /pscore 0 /loss 0.053898245 / Q_MAX -6.109025e-01
ts 234439 action_freq [118.  13.] /e 0.092 /pscore 2 /loss 0.15529522 / Q_MAX 2.258163e-01
ts 234637 action_freq [166.  32.] /e 0.092 /pscore 4 /loss 0.12720698 / Q_MAX -6.051403e-01
ts 234687 action_freq [31. 19.] /e 0.092 /pscore 0 /loss 0.15740159 / Q_MAX -1.297699e+00
ts 234737 action_freq [31. 19.] /e 0.092 /pscore 0 /loss 0.3016549 / Q_MAX -1.296725e+00
ts 234794 action_freq [46. 11.] /e 0.092 /pscore 0 /loss 0.026723113 / Q_MAX -1.815942e+00
ts 234854 action_freq [49. 11.] /e 0.092 /pscore 0 /loss 0.09616748 / Q_MAX -2.256201e+00
ts 234943 action_freq [78. 11.] /e 0.092 /pscore 1 /loss 0.564698 / Q_MAX 8.381815e-01
ts 235157

ts 242432 action_freq [41.  9.] /e 0.092 /pscore 0 /loss 0.09193909 / Q_MAX 1.699132e+00
ts 242482 action_freq [37. 13.] /e 0.092 /pscore 0 /loss 0.07827664 / Q_MAX 2.703769e+00
ts 242538 action_freq [41. 15.] /e 0.092 /pscore 0 /loss 0.146319 / Q_MAX 6.242632e-01
ts 242929 action_freq [345.  46.] /e 0.092 /pscore 9 /loss 0.16868956 / Q_MAX -1.154134e+00
ts 243089 action_freq [141.  19.] /e 0.092 /pscore 3 /loss 0.15659629 / Q_MAX -1.566665e+00
ts 243142 action_freq [48.  5.] /e 0.092 /pscore 0 /loss 0.25543502 / Q_MAX 1.481492e+01
ts 243198 action_freq [51.  5.] /e 0.092 /pscore 0 /loss 0.14484838 / Q_MAX 2.555595e+00
ts 243395 action_freq [178.  19.] /e 0.092 /pscore 4 /loss 1.7477965 / Q_MAX -3.281702e-01
ts 243445 action_freq [35. 15.] /e 0.092 /pscore 0 /loss 0.3577394 / Q_MAX -9.626532e-01
ts 243548 action_freq [87. 16.] /e 0.092 /pscore 2 /loss 0.2886259 / Q_MAX 1.575646e+01
ts 243745 action_freq [179.  18.] /e 0.092 /pscore 4 /loss 0.060493484 / Q_MAX -8.764764e-01
ts 243832 ac

ts 251054 action_freq [26. 24.] /e 0.092 /pscore 0 /loss 0.2529478 / Q_MAX -1.173659e+00
ts 251105 action_freq [38. 13.] /e 0.092 /pscore 0 /loss 0.119869485 / Q_MAX 6.195430e+00
ts 251155 action_freq [37. 13.] /e 0.092 /pscore 0 /loss 0.10975621 / Q_MAX -1.350374e+00
ts 251206 action_freq [38. 13.] /e 0.092 /pscore 0 /loss 0.17970261 / Q_MAX -1.444690e+00
ts 251256 action_freq [18. 32.] /e 0.092 /pscore 0 /loss 2.0824282 / Q_MAX -6.677803e-01
ts 251342 action_freq [67. 19.] /e 0.092 /pscore 1 /loss 0.12799108 / Q_MAX -7.430420e-01
ts 251428 action_freq [70. 16.] /e 0.092 /pscore 1 /loss 0.41600925 / Q_MAX 7.844800e+00
ts 251746 action_freq [280.  38.] /e 0.092 /pscore 7 /loss 0.28896114 / Q_MAX -2.320169e+00
ts 251797 action_freq [43.  8.] /e 0.092 /pscore 0 /loss 0.49822003 / Q_MAX -2.070363e+00
ts 251921 action_freq [98. 26.] /e 0.092 /pscore 2 /loss 0.17776866 / Q_MAX -1.263775e+00
ts 252155 action_freq [199.  35.] /e 0.092 /pscore 5 /loss 0.20162827 / Q_MAX -2.595583e+00
ts 252205

ts 259068 action_freq [62. 24.] /e 0.091 /pscore 1 /loss 0.16308634 / Q_MAX -6.192176e-01
ts 259118 action_freq [19. 31.] /e 0.091 /pscore 0 /loss 0.10083002 / Q_MAX -3.959414e-01
ts 259206 action_freq [71. 17.] /e 0.091 /pscore 1 /loss 0.3292141 / Q_MAX 1.174308e+00
ts 259256 action_freq [34. 16.] /e 0.091 /pscore 0 /loss 0.10659726 / Q_MAX -2.642173e-01
ts 259342 action_freq [66. 20.] /e 0.091 /pscore 1 /loss 0.3372824 / Q_MAX -6.140354e-01
ts 259470 action_freq [102.  26.] /e 0.091 /pscore 2 /loss 0.18202251 / Q_MAX 5.395659e-01
ts 259593 action_freq [103.  20.] /e 0.091 /pscore 2 /loss 0.120908156 / Q_MAX -7.721583e-01
ts 259643 action_freq [23. 27.] /e 0.091 /pscore 0 /loss 0.15912762 / Q_MAX -5.378291e-01
ts 259767 action_freq [107.  17.] /e 0.091 /pscore 2 /loss 0.07436384 / Q_MAX -6.254541e-01
ts 259853 action_freq [71. 15.] /e 0.091 /pscore 1 /loss 0.140151 / Q_MAX -8.256655e-01
ts 259945 action_freq [83.  9.] /e 0.091 /pscore 1 /loss 0.46701333 / Q_MAX -1.816810e+00
ts 260031

ts 266783 action_freq [26. 24.] /e 0.091 /pscore 0 /loss 0.08906078 / Q_MAX -4.024104e-01
ts 266833 action_freq [37. 13.] /e 0.091 /pscore 0 /loss 0.12341291 / Q_MAX 2.778131e-02
ts 266883 action_freq [29. 21.] /e 0.091 /pscore 0 /loss 0.37716907 / Q_MAX -8.582579e-01
ts 266933 action_freq [20. 30.] /e 0.091 /pscore 0 /loss 0.030198518 / Q_MAX -4.887996e-01
ts 267037 action_freq [96.  8.] /e 0.091 /pscore 2 /loss 0.062401082 / Q_MAX 3.324205e+00
ts 267160 action_freq [104.  19.] /e 0.091 /pscore 2 /loss 0.27257007 / Q_MAX -3.697342e-01
ts 267210 action_freq [38. 12.] /e 0.091 /pscore 0 /loss 0.46224862 / Q_MAX -9.428142e-01
ts 267338 action_freq [110.  18.] /e 0.091 /pscore 2 /loss 0.07601822 / Q_MAX -2.893968e-01
ts 267497 action_freq [142.  17.] /e 0.091 /pscore 3 /loss 0.15054217 / Q_MAX 4.955426e+00
ts 267547 action_freq [24. 26.] /e 0.091 /pscore 0 /loss 0.1536993 / Q_MAX -2.545761e-01
ts 267633 action_freq [62. 24.] /e 0.091 /pscore 1 /loss 0.021561624 / Q_MAX -2.923046e-01
ts 26

ts 274731 action_freq [33. 17.] /e 0.091 /pscore 0 /loss 0.18912981 / Q_MAX -1.573206e+00
ts 274781 action_freq [27. 23.] /e 0.091 /pscore 0 /loss 0.12790865 / Q_MAX -1.109348e+00
ts 274831 action_freq [35. 15.] /e 0.091 /pscore 0 /loss 0.1719432 / Q_MAX -8.948653e-01
ts 275029 action_freq [173.  25.] /e 0.091 /pscore 4 /loss 0.124149226 / Q_MAX -5.107757e-01
ts 275204 action_freq [144.  31.] /e 0.091 /pscore 4 /loss 0.12944743 / Q_MAX -8.946171e-01
ts 275254 action_freq [32. 18.] /e 0.091 /pscore 0 /loss 0.07933432 / Q_MAX -9.854105e-01
ts 275304 action_freq [34. 16.] /e 0.091 /pscore 0 /loss 0.067979455 / Q_MAX -8.511292e-01
ts 275390 action_freq [63. 23.] /e 0.091 /pscore 1 /loss 0.058964577 / Q_MAX -3.662161e-01
ts 275513 action_freq [108.  15.] /e 0.091 /pscore 2 /loss 0.1492421 / Q_MAX -2.879450e-01
ts 275564 action_freq [39. 12.] /e 0.091 /pscore 0 /loss 0.08189155 / Q_MAX -3.245067e-01
ts 275797 action_freq [206.  27.] /e 0.091 /pscore 5 /loss 0.061028466 / Q_MAX -5.525638e-01


ts 283320 action_freq [34. 16.] /e 0.091 /pscore 0 /loss 0.075595304 / Q_MAX -6.935227e-01
ts 283370 action_freq [23. 27.] /e 0.091 /pscore 0 /loss 0.034125954 / Q_MAX -1.486076e+00
ts 283420 action_freq [36. 14.] /e 0.091 /pscore 0 /loss 0.43153325 / Q_MAX -1.207540e+00
ts 283558 action_freq [112.  26.] /e 0.091 /pscore 3 /loss 0.24947815 / Q_MAX 1.283472e+00
ts 283621 action_freq [49. 14.] /e 0.091 /pscore 1 /loss 0.13300212 / Q_MAX -1.001427e+00
ts 283671 action_freq [37. 13.] /e 0.091 /pscore 0 /loss 0.18397236 / Q_MAX -2.845749e+00
ts 283757 action_freq [59. 27.] /e 0.091 /pscore 1 /loss 0.071817294 / Q_MAX -7.331006e-01
ts 283807 action_freq [24. 26.] /e 0.091 /pscore 0 /loss 1.3370811 / Q_MAX -2.288383e+00
ts 283867 action_freq [43. 17.] /e 0.091 /pscore 0 /loss 0.16960467 / Q_MAX -1.014190e+00
ts 283917 action_freq [32. 18.] /e 0.091 /pscore 0 /loss 0.051061004 / Q_MAX -9.384104e-01
ts 284003 action_freq [64. 22.] /e 0.091 /pscore 1 /loss 0.046071563 / Q_MAX -9.117515e-01
ts 28

ts 291860 action_freq [33. 17.] /e 0.09 /pscore 0 /loss 0.19632256 / Q_MAX 4.614049e-02
ts 291910 action_freq [26. 24.] /e 0.09 /pscore 0 /loss 0.09432341 / Q_MAX 4.304439e-01
ts 291996 action_freq [70. 16.] /e 0.09 /pscore 1 /loss 0.10769595 / Q_MAX 2.505379e-01
ts 292120 action_freq [103.  21.] /e 0.09 /pscore 2 /loss 0.040293455 / Q_MAX 2.148658e-01
ts 292187 action_freq [53. 14.] /e 0.09 /pscore 1 /loss 0.043205447 / Q_MAX 6.346759e+00
ts 292273 action_freq [61. 25.] /e 0.09 /pscore 1 /loss 0.09897667 / Q_MAX 6.906027e-01
ts 292432 action_freq [135.  24.] /e 0.09 /pscore 3 /loss 0.06368579 / Q_MAX -1.298428e-01
ts 292518 action_freq [62. 24.] /e 0.09 /pscore 1 /loss 1.6349486 / Q_MAX -8.418663e-01
ts 292611 action_freq [73. 20.] /e 0.09 /pscore 1 /loss 0.7088086 / Q_MAX -2.133680e-01
ts 292676 action_freq [52. 13.] /e 0.09 /pscore 1 /loss 0.10407752 / Q_MAX -3.209909e-01
ts 292726 action_freq [30. 20.] /e 0.09 /pscore 0 /loss 0.14529888 / Q_MAX -7.108427e-01
ts 292786 action_freq [

ts 299934 action_freq [37. 13.] /e 0.09 /pscore 0 /loss 0.10622038 / Q_MAX -1.475308e+00
ts 300101 action_freq [150.  17.] /e 0.09 /pscore 3 /loss 0.076895356 / Q_MAX 2.562882e-01
ts 300170 action_freq [62.  7.] /e 0.09 /pscore 1 /loss 0.42037436 / Q_MAX 7.703831e-01
ts 300220 action_freq [40. 10.] /e 0.09 /pscore 0 /loss 0.021566447 / Q_MAX 2.041090e-02
ts 300273 action_freq [41. 12.] /e 0.09 /pscore 0 /loss 0.09713575 / Q_MAX -4.419211e-01
ts 300323 action_freq [35. 15.] /e 0.09 /pscore 0 /loss 0.1960953 / Q_MAX 1.018891e-01
ts 300373 action_freq [33. 17.] /e 0.09 /pscore 0 /loss 0.074485146 / Q_MAX -5.935065e-01
ts 300426 action_freq [37. 16.] /e 0.09 /pscore 0 /loss 0.09147991 / Q_MAX -8.569878e-02
ts 300476 action_freq [27. 23.] /e 0.09 /pscore 0 /loss 0.31723285 / Q_MAX 1.547279e+00
ts 300526 action_freq [26. 24.] /e 0.09 /pscore 0 /loss 0.18828784 / Q_MAX -4.706402e-01
ts 300576 action_freq [35. 15.] /e 0.09 /pscore 0 /loss 0.18361142 / Q_MAX -4.689378e-01
ts 300635 action_freq 

ts 307537 action_freq [26. 24.] /e 0.09 /pscore 0 /loss 0.15952224 / Q_MAX -6.993084e-01
ts 307660 action_freq [100.  23.] /e 0.09 /pscore 2 /loss 0.12230052 / Q_MAX 3.392994e-03
ts 307900 action_freq [208.  32.] /e 0.09 /pscore 5 /loss 0.05995381 / Q_MAX -3.129953e-01
ts 307991 action_freq [73. 18.] /e 0.09 /pscore 1 /loss 0.31107357 / Q_MAX 1.636583e+01
ts 308050 action_freq [45. 14.] /e 0.09 /pscore 0 /loss 0.8559898 / Q_MAX -6.964175e-01
ts 308440 action_freq [348.  42.] /e 0.09 /pscore 9 /loss 0.3333615 / Q_MAX 1.026009e-01
ts 308490 action_freq [44.  6.] /e 0.09 /pscore 0 /loss 0.32552582 / Q_MAX -3.513412e-01
ts 308540 action_freq [41.  9.] /e 0.09 /pscore 0 /loss 0.43788677 / Q_MAX -4.696803e-01
ts 308737 action_freq [171.  26.] /e 0.09 /pscore 4 /loss 0.2466989 / Q_MAX -5.009012e-01
ts 308798 action_freq [50. 11.] /e 0.09 /pscore 0 /loss 0.06613784 / Q_MAX 3.107287e-01
ts 308859 action_freq [48. 13.] /e 0.09 /pscore 0 /loss 0.4292555 / Q_MAX -1.098644e+00
ts 308945 action_freq

ts 316283 action_freq [216.  31.] /e 0.09 /pscore 6 /loss 0.12483954 / Q_MAX 5.337209e-01
ts 316350 action_freq [42. 25.] /e 0.089 /pscore 1 /loss 0.056782432 / Q_MAX 6.446679e-01
ts 316400 action_freq [20. 30.] /e 0.089 /pscore 0 /loss 0.2139028 / Q_MAX -8.174432e-01
ts 316598 action_freq [167.  31.] /e 0.089 /pscore 4 /loss 0.22019562 / Q_MAX -1.707956e+00
ts 316688 action_freq [76. 14.] /e 0.089 /pscore 1 /loss 0.64056665 / Q_MAX -3.157718e-01
ts 316738 action_freq [40. 10.] /e 0.089 /pscore 0 /loss 0.081874505 / Q_MAX -1.475452e+00
ts 316788 action_freq [41.  9.] /e 0.089 /pscore 0 /loss 0.062320992 / Q_MAX -1.047993e+00
ts 316838 action_freq [38. 12.] /e 0.089 /pscore 0 /loss 0.10377143 / Q_MAX -8.507251e-01
ts 316965 action_freq [112.  15.] /e 0.089 /pscore 2 /loss 0.055915713 / Q_MAX 3.360063e-01
ts 317022 action_freq [45. 12.] /e 0.089 /pscore 0 /loss 0.15291467 / Q_MAX 4.471652e-01
ts 317079 action_freq [43. 14.] /e 0.089 /pscore 0 /loss 0.046216697 / Q_MAX -2.309941e-01
ts 31

ts 323397 action_freq [32. 18.] /e 0.089 /pscore 0 /loss 0.053627715 / Q_MAX -1.034310e+00
ts 323521 action_freq [101.  23.] /e 0.089 /pscore 2 /loss 0.21626982 / Q_MAX -1.696064e+00
ts 323571 action_freq [23. 27.] /e 0.089 /pscore 0 /loss 0.32479626 / Q_MAX -1.669107e+00
ts 324065 action_freq [443.  51.] /e 0.089 /pscore 12 /loss 0.15528992 / Q_MAX 5.359814e-02
ts 324123 action_freq [41. 17.] /e 0.089 /pscore 0 /loss 0.07655432 / Q_MAX -5.802571e-01
ts 324178 action_freq [39. 16.] /e 0.089 /pscore 0 /loss 0.2224403 / Q_MAX -8.438107e-02
ts 324314 action_freq [108.  28.] /e 0.089 /pscore 3 /loss 0.22330655 / Q_MAX -1.677236e+00
ts 324364 action_freq [40. 10.] /e 0.089 /pscore 0 /loss 0.45076483 / Q_MAX -1.020005e+00
ts 324427 action_freq [54.  9.] /e 0.089 /pscore 1 /loss 0.1435625 / Q_MAX 2.271643e-01
ts 324587 action_freq [128.  32.] /e 0.089 /pscore 3 /loss 0.052468956 / Q_MAX -1.113150e+00
ts 324637 action_freq [ 6. 44.] /e 0.089 /pscore 0 /loss 0.21096668 / Q_MAX -8.543533e-01
ts 

ts 332206 action_freq [18. 32.] /e 0.089 /pscore 0 /loss 0.13422975 / Q_MAX 6.265335e-02
ts 332662 action_freq [400.  56.] /e 0.089 /pscore 11 /loss 0.02707833 / Q_MAX -7.431006e-01
ts 332729 action_freq [62.  5.] /e 0.089 /pscore 1 /loss 0.21338238 / Q_MAX 2.486632e-01
ts 332889 action_freq [136.  24.] /e 0.089 /pscore 3 /loss 0.054937076 / Q_MAX -7.718166e-01
ts 333016 action_freq [112.  15.] /e 0.089 /pscore 2 /loss 0.088466175 / Q_MAX 8.220066e-01
ts 333070 action_freq [43. 11.] /e 0.089 /pscore 0 /loss 0.14802626 / Q_MAX -4.684797e-01
ts 333193 action_freq [103.  20.] /e 0.089 /pscore 2 /loss 0.277175 / Q_MAX -5.100950e-01
ts 333317 action_freq [103.  21.] /e 0.089 /pscore 2 /loss 0.32778654 / Q_MAX -9.348545e-01
ts 333515 action_freq [167.  31.] /e 0.089 /pscore 4 /loss 0.33446854 / Q_MAX -2.855764e-01
ts 333567 action_freq [43.  9.] /e 0.089 /pscore 0 /loss 0.093511075 / Q_MAX -3.374456e-01
ts 333624 action_freq [44. 13.] /e 0.089 /pscore 0 /loss 0.22670084 / Q_MAX -5.987836e-01

ts 341415 action_freq [79.  8.] /e 0.089 /pscore 1 /loss 0.54195356 / Q_MAX -7.329324e-01
ts 341557 action_freq [127.  15.] /e 0.089 /pscore 3 /loss 2.126427 / Q_MAX 1.496098e+01
ts 341607 action_freq [38. 12.] /e 0.089 /pscore 0 /loss 0.03468605 / Q_MAX -6.216712e-01
ts 341657 action_freq [27. 23.] /e 0.089 /pscore 0 /loss 0.3255094 / Q_MAX -9.859250e-01
ts 341707 action_freq [26. 24.] /e 0.089 /pscore 0 /loss 0.31343216 / Q_MAX -8.014742e-01
ts 341757 action_freq [31. 19.] /e 0.089 /pscore 0 /loss 0.1934358 / Q_MAX -8.644772e-01
ts 341807 action_freq [24. 26.] /e 0.089 /pscore 0 /loss 0.16856307 / Q_MAX -1.248024e+00
ts 341857 action_freq [17. 33.] /e 0.089 /pscore 0 /loss 0.13434033 / Q_MAX -1.134783e+00
ts 341907 action_freq [28. 22.] /e 0.089 /pscore 0 /loss 0.09523909 / Q_MAX -1.138246e+00
ts 342007 action_freq [80. 20.] /e 0.089 /pscore 2 /loss 0.08629737 / Q_MAX 1.249960e+01
ts 342057 action_freq [34. 16.] /e 0.089 /pscore 0 /loss 0.15459807 / Q_MAX -9.985254e-01
ts 342182 acti

ts 348425 action_freq [63. 25.] /e 0.088 /pscore 1 /loss 0.073422074 / Q_MAX -2.494611e+00
ts 348475 action_freq [28. 22.] /e 0.088 /pscore 0 /loss 1.2879666 / Q_MAX -1.606758e+00
ts 348525 action_freq [25. 25.] /e 0.088 /pscore 0 /loss 0.34030288 / Q_MAX -1.065802e+00
ts 348575 action_freq [25. 25.] /e 0.088 /pscore 0 /loss 0.64109933 / Q_MAX -1.113660e+00
ts 348626 action_freq [40. 11.] /e 0.088 /pscore 0 /loss 0.28421146 / Q_MAX -1.900833e-01
ts 348676 action_freq [37. 13.] /e 0.088 /pscore 0 /loss 0.66286963 / Q_MAX -1.783241e+00
ts 348836 action_freq [140.  20.] /e 0.088 /pscore 3 /loss 0.28847194 / Q_MAX -1.777611e+00
ts 348887 action_freq [46.  5.] /e 0.088 /pscore 0 /loss 0.30041584 / Q_MAX 1.202523e+01
ts 348983 action_freq [82. 14.] /e 0.088 /pscore 1 /loss 0.14501162 / Q_MAX 5.499825e-01
ts 349217 action_freq [215.  19.] /e 0.088 /pscore 5 /loss 0.36811274 / Q_MAX -7.067984e-01
ts 349377 action_freq [140.  20.] /e 0.088 /pscore 3 /loss 1.9733196 / Q_MAX 7.463546e-02
ts 34946

ts 356826 action_freq [70. 29.] /e 0.088 /pscore 2 /loss 0.11278036 / Q_MAX -6.854107e-01
ts 356912 action_freq [59. 27.] /e 0.088 /pscore 1 /loss 0.026784562 / Q_MAX -1.019344e+00
ts 356962 action_freq [29. 21.] /e 0.088 /pscore 0 /loss 0.13915308 / Q_MAX -1.040970e+00
ts 357018 action_freq [47.  9.] /e 0.088 /pscore 0 /loss 0.027541868 / Q_MAX 1.627977e+01
ts 357068 action_freq [43.  7.] /e 0.088 /pscore 0 /loss 0.05726291 / Q_MAX -8.803003e-01
ts 357118 action_freq [34. 16.] /e 0.088 /pscore 0 /loss 0.14032361 / Q_MAX -1.371794e+00
ts 357168 action_freq [40. 10.] /e 0.088 /pscore 0 /loss 0.11503874 / Q_MAX 1.440856e+00
ts 357339 action_freq [132.  39.] /e 0.088 /pscore 3 /loss 2.1274886 / Q_MAX 1.616020e+01
ts 357389 action_freq [27. 23.] /e 0.088 /pscore 0 /loss 0.26606137 / Q_MAX -6.002970e-01
ts 357455 action_freq [44. 22.] /e 0.088 /pscore 1 /loss 0.058531813 / Q_MAX -4.741405e-01
ts 357506 action_freq [38. 13.] /e 0.088 /pscore 0 /loss 0.66698956 / Q_MAX 7.980147e+00
ts 357556 

ts 364064 action_freq [26. 24.] /e 0.088 /pscore 0 /loss 0.11335778 / Q_MAX -5.976050e-01
ts 364114 action_freq [18. 32.] /e 0.088 /pscore 0 /loss 0.06489546 / Q_MAX -5.563676e-01
ts 364164 action_freq [19. 31.] /e 0.088 /pscore 0 /loss 0.036045715 / Q_MAX -1.021112e-01
ts 364222 action_freq [40. 18.] /e 0.088 /pscore 0 /loss 0.09479308 / Q_MAX 4.843571e+00
ts 364391 action_freq [142.  27.] /e 0.088 /pscore 3 /loss 0.53241074 / Q_MAX 4.582144e-01
ts 364453 action_freq [52. 10.] /e 0.088 /pscore 1 /loss 0.2382201 / Q_MAX -1.499808e+00
ts 364553 action_freq [84. 16.] /e 0.088 /pscore 2 /loss 0.1182965 / Q_MAX 1.577376e+01
ts 364605 action_freq [36. 16.] /e 0.088 /pscore 0 /loss 0.21749786 / Q_MAX 1.600765e+01
ts 364658 action_freq [41. 12.] /e 0.088 /pscore 0 /loss 0.08970483 / Q_MAX -9.615966e-01
ts 364708 action_freq [29. 21.] /e 0.088 /pscore 0 /loss 0.07644469 / Q_MAX -3.925435e-01
ts 364832 action_freq [90. 34.] /e 0.088 /pscore 2 /loss 0.047528423 / Q_MAX -7.010354e-01
ts 364882 ac

ts 371174 action_freq [23. 27.] /e 0.088 /pscore 0 /loss 0.21642266 / Q_MAX -5.825928e-01
ts 371333 action_freq [138.  21.] /e 0.088 /pscore 3 /loss 0.63485277 / Q_MAX -5.093523e-01
ts 371383 action_freq [41.  9.] /e 0.088 /pscore 0 /loss 0.09539987 / Q_MAX -6.837956e-01
ts 371433 action_freq [19. 31.] /e 0.088 /pscore 0 /loss 0.060369138 / Q_MAX -5.048637e-01
ts 371636 action_freq [167.  36.] /e 0.088 /pscore 4 /loss 0.20928118 / Q_MAX -8.353988e-01
ts 371686 action_freq [26. 24.] /e 0.088 /pscore 0 /loss 0.07830836 / Q_MAX -7.408322e-01
ts 371736 action_freq [23. 27.] /e 0.088 /pscore 0 /loss 0.27587092 / Q_MAX -7.671115e-01
ts 371788 action_freq [34. 18.] /e 0.088 /pscore 0 /loss 0.22812289 / Q_MAX 1.067611e+00
ts 371953 action_freq [132.  33.] /e 0.088 /pscore 3 /loss 0.06256382 / Q_MAX 3.496403e-01
ts 372003 action_freq [15. 35.] /e 0.088 /pscore 0 /loss 0.13170886 / Q_MAX -4.941774e-01
ts 372090 action_freq [62. 25.] /e 0.088 /pscore 1 /loss 0.039413106 / Q_MAX -7.862529e-01
ts 3

ts 378446 action_freq [27. 23.] /e 0.087 /pscore 0 /loss 0.38127112 / Q_MAX -4.026783e-01
ts 378496 action_freq [16. 34.] /e 0.087 /pscore 0 /loss 0.15808134 / Q_MAX -5.905085e-01
ts 378546 action_freq [10. 40.] /e 0.087 /pscore 0 /loss 0.5603875 / Q_MAX -8.497326e-01
ts 378605 action_freq [36. 23.] /e 0.087 /pscore 0 /loss 0.19978827 / Q_MAX -1.923507e-01
ts 378699 action_freq [73. 21.] /e 0.087 /pscore 1 /loss 0.86605346 / Q_MAX -8.191240e-01
ts 378824 action_freq [108.  17.] /e 0.087 /pscore 2 /loss 0.10513352 / Q_MAX -9.468735e-01
ts 379025 action_freq [174.  27.] /e 0.087 /pscore 4 /loss 0.09190212 / Q_MAX -1.030140e+00
ts 379126 action_freq [84. 17.] /e 0.087 /pscore 2 /loss 0.13842261 / Q_MAX -1.756889e+00
ts 379212 action_freq [71. 15.] /e 0.087 /pscore 1 /loss 0.145092 / Q_MAX -2.450840e+00
ts 379262 action_freq [29. 21.] /e 0.087 /pscore 0 /loss 0.32734272 / Q_MAX -1.045682e+00
ts 379348 action_freq [76. 10.] /e 0.087 /pscore 1 /loss 0.032829892 / Q_MAX -9.462878e-01
ts 37965

ts 386712 action_freq [24. 26.] /e 0.087 /pscore 0 /loss 0.31921586 / Q_MAX -1.020292e+00
ts 386762 action_freq [37. 13.] /e 0.087 /pscore 0 /loss 0.17023702 / Q_MAX -9.193459e-01
ts 386813 action_freq [46.  5.] /e 0.087 /pscore 0 /loss 0.27046055 / Q_MAX -3.530808e-01
ts 386906 action_freq [71. 22.] /e 0.087 /pscore 1 /loss 0.64311755 / Q_MAX -5.340833e-01
ts 386957 action_freq [34. 17.] /e 0.087 /pscore 0 /loss 0.1376748 / Q_MAX -1.753155e+00
ts 387007 action_freq [39. 11.] /e 0.087 /pscore 0 /loss 0.30757922 / Q_MAX -1.124266e+00
ts 387204 action_freq [178.  19.] /e 0.087 /pscore 4 /loss 0.75174755 / Q_MAX -6.331626e-01
ts 387440 action_freq [208.  28.] /e 0.087 /pscore 5 /loss 0.1000508 / Q_MAX 1.706912e+00
ts 387564 action_freq [93. 31.] /e 0.087 /pscore 2 /loss 0.20948222 / Q_MAX -6.661775e-01
ts 387614 action_freq [36. 14.] /e 0.087 /pscore 0 /loss 0.11120081 / Q_MAX 1.247263e+01
ts 387700 action_freq [64. 22.] /e 0.087 /pscore 1 /loss 0.072225966 / Q_MAX -7.273490e-01
ts 387750

ts 394769 action_freq [65. 21.] /e 0.087 /pscore 1 /loss 0.10685429 / Q_MAX -9.084268e-01
ts 394819 action_freq [22. 28.] /e 0.087 /pscore 0 /loss 0.10752365 / Q_MAX -1.350556e+00
ts 394869 action_freq [24. 26.] /e 0.087 /pscore 0 /loss 0.22824049 / Q_MAX -2.265428e+00
ts 394919 action_freq [21. 29.] /e 0.087 /pscore 0 /loss 0.2068913 / Q_MAX -2.427881e+00
ts 394969 action_freq [32. 18.] /e 0.087 /pscore 0 /loss 0.055755258 / Q_MAX -9.148835e-01
ts 395019 action_freq [25. 25.] /e 0.087 /pscore 0 /loss 0.07152011 / Q_MAX -1.622036e+00
ts 395105 action_freq [71. 15.] /e 0.087 /pscore 1 /loss 0.09335884 / Q_MAX -8.049500e-01
ts 395191 action_freq [76. 10.] /e 0.087 /pscore 1 /loss 0.030793445 / Q_MAX -5.887483e-01
ts 395318 action_freq [107.  20.] /e 0.087 /pscore 2 /loss 0.0793468 / Q_MAX 2.413710e+00
ts 395368 action_freq [28. 22.] /e 0.087 /pscore 0 /loss 0.07242374 / Q_MAX -3.631788e-01
ts 395418 action_freq [21. 29.] /e 0.087 /pscore 0 /loss 0.07131149 / Q_MAX -1.586931e+00
ts 395468

ts 402031 action_freq [44.  6.] /e 0.087 /pscore 0 /loss 0.08647846 / Q_MAX -1.341593e+00
ts 402155 action_freq [107.  17.] /e 0.087 /pscore 2 /loss 0.60982734 / Q_MAX -1.043593e-01
ts 402354 action_freq [174.  25.] /e 0.087 /pscore 4 /loss 0.6560703 / Q_MAX -1.527114e+00
ts 402404 action_freq [26. 24.] /e 0.087 /pscore 0 /loss 0.27448684 / Q_MAX -1.163507e+00
ts 402454 action_freq [30. 20.] /e 0.087 /pscore 0 /loss 0.043988947 / Q_MAX 2.911812e-01
ts 402504 action_freq [24. 26.] /e 0.087 /pscore 0 /loss 0.14574476 / Q_MAX -5.746925e-01
ts 402608 action_freq [85. 19.] /e 0.087 /pscore 2 /loss 0.051699482 / Q_MAX 3.588153e-01
ts 402694 action_freq [76. 10.] /e 0.087 /pscore 1 /loss 0.34588742 / Q_MAX -1.000422e+00
ts 402744 action_freq [42.  8.] /e 0.087 /pscore 0 /loss 0.38069725 / Q_MAX -7.606579e-01
ts 402794 action_freq [34. 16.] /e 0.087 /pscore 0 /loss 0.08307406 / Q_MAX -8.878996e-01
ts 402991 action_freq [178.  19.] /e 0.087 /pscore 4 /loss 0.060782537 / Q_MAX -5.235848e-01
ts 4

ts 410499 action_freq [33. 17.] /e 0.086 /pscore 0 /loss 0.09912288 / Q_MAX -7.254322e-01
ts 410549 action_freq [32. 18.] /e 0.086 /pscore 0 /loss 0.0763022 / Q_MAX -6.456885e-01
ts 410599 action_freq [32. 18.] /e 0.086 /pscore 0 /loss 0.14807366 / Q_MAX -5.528904e-01
ts 410649 action_freq [26. 24.] /e 0.086 /pscore 0 /loss 0.43904647 / Q_MAX -5.269762e-01
ts 410716 action_freq [49. 18.] /e 0.086 /pscore 1 /loss 0.100458086 / Q_MAX 6.786954e-01
ts 410814 action_freq [79. 19.] /e 0.086 /pscore 2 /loss 0.19558357 / Q_MAX 1.452225e+01
ts 410864 action_freq [24. 26.] /e 0.086 /pscore 0 /loss 0.05485761 / Q_MAX -6.305702e-01
ts 410914 action_freq [33. 17.] /e 0.086 /pscore 0 /loss 1.550734 / Q_MAX -9.019214e-01
ts 410964 action_freq [31. 19.] /e 0.086 /pscore 0 /loss 0.14123018 / Q_MAX -9.894736e-01
ts 411346 action_freq [329.  53.] /e 0.086 /pscore 9 /loss 0.07612567 / Q_MAX -1.155983e+00
ts 411442 action_freq [77. 19.] /e 0.086 /pscore 1 /loss 0.17727014 / Q_MAX 6.171635e-01
ts 411614 act

ts 418392 action_freq [43. 23.] /e 0.086 /pscore 1 /loss 0.11071348 / Q_MAX -3.826395e-01
ts 418590 action_freq [168.  30.] /e 0.086 /pscore 4 /loss 0.06391052 / Q_MAX -9.861993e-01
ts 418689 action_freq [77. 22.] /e 0.086 /pscore 2 /loss 0.37446535 / Q_MAX 4.252779e+00
ts 418739 action_freq [24. 26.] /e 0.086 /pscore 0 /loss 0.10629548 / Q_MAX -9.638565e-01
ts 418789 action_freq [21. 29.] /e 0.086 /pscore 0 /loss 0.16882645 / Q_MAX -7.615330e-01
ts 418949 action_freq [133.  27.] /e 0.086 /pscore 3 /loss 0.06094881 / Q_MAX -1.236901e+00
ts 419036 action_freq [75. 12.] /e 0.086 /pscore 1 /loss 0.08626261 / Q_MAX -1.312757e+00
ts 419122 action_freq [74. 12.] /e 0.086 /pscore 1 /loss 0.117001176 / Q_MAX -6.498666e-01
ts 419216 action_freq [85.  9.] /e 0.086 /pscore 1 /loss 0.24088019 / Q_MAX 2.563531e-01
ts 419614 action_freq [348.  50.] /e 0.086 /pscore 10 /loss 0.40177694 / Q_MAX -9.925562e-01
ts 419737 action_freq [96. 27.] /e 0.086 /pscore 2 /loss 0.24609788 / Q_MAX 1.087660e-01
ts 41

ts 426963 action_freq [171.  27.] /e 0.086 /pscore 4 /loss 0.14452626 / Q_MAX -1.921607e+00
ts 427013 action_freq [25. 25.] /e 0.086 /pscore 0 /loss 0.10222135 / Q_MAX -7.044449e-01
ts 427114 action_freq [89. 12.] /e 0.086 /pscore 2 /loss 0.31470838 / Q_MAX -3.147369e+00
ts 427164 action_freq [37. 13.] /e 0.086 /pscore 0 /loss 0.17493846 / Q_MAX -7.690720e-01
ts 427250 action_freq [74. 12.] /e 0.086 /pscore 1 /loss 0.025619796 / Q_MAX -1.418581e+00
ts 427304 action_freq [43. 11.] /e 0.086 /pscore 0 /loss 0.0744207 / Q_MAX 3.269960e+00
ts 427428 action_freq [92. 32.] /e 0.086 /pscore 2 /loss 0.33052173 / Q_MAX -9.001067e-01
ts 427681 action_freq [222.  31.] /e 0.086 /pscore 6 /loss 0.04670871 / Q_MAX 1.245168e+01
ts 427731 action_freq [43.  7.] /e 0.086 /pscore 0 /loss 0.45398912 / Q_MAX -1.683478e+00
ts 427797 action_freq [49. 17.] /e 0.086 /pscore 1 /loss 0.12878865 / Q_MAX 6.413845e-01
ts 427847 action_freq [24. 26.] /e 0.086 /pscore 0 /loss 0.10663069 / Q_MAX -7.364736e-01
ts 427933

ts 434616 action_freq [38. 12.] /e 0.086 /pscore 0 /loss 0.28161085 / Q_MAX -9.275786e-01
ts 434740 action_freq [99. 25.] /e 0.086 /pscore 2 /loss 0.23369893 / Q_MAX -1.030266e+00
ts 434790 action_freq [36. 14.] /e 0.086 /pscore 0 /loss 0.11925912 / Q_MAX -1.714334e+00
ts 434914 action_freq [88. 36.] /e 0.086 /pscore 2 /loss 0.69348294 / Q_MAX -7.028179e-01
ts 435005 action_freq [73. 18.] /e 0.086 /pscore 1 /loss 0.12629977 / Q_MAX -2.296281e-02
ts 435164 action_freq [141.  18.] /e 0.086 /pscore 3 /loss 0.76240426 / Q_MAX 1.320109e+00
ts 435301 action_freq [120.  17.] /e 0.086 /pscore 3 /loss 0.2732023 / Q_MAX -3.937975e-01
ts 435351 action_freq [39. 11.] /e 0.086 /pscore 0 /loss 0.27473965 / Q_MAX -3.698685e-01
ts 435511 action_freq [130.  30.] /e 0.086 /pscore 3 /loss 0.071028605 / Q_MAX -6.173196e-01
ts 435565 action_freq [38. 16.] /e 0.086 /pscore 0 /loss 0.3012314 / Q_MAX -9.002529e-01
ts 435615 action_freq [39. 11.] /e 0.086 /pscore 0 /loss 0.05465617 / Q_MAX -4.625324e-01
ts 435

ts 442100 action_freq [60. 10.] /e 0.085 /pscore 1 /loss 0.11294907 / Q_MAX 7.672054e+00
ts 442150 action_freq [41.  9.] /e 0.085 /pscore 0 /loss 0.6420782 / Q_MAX -1.105171e+00
ts 442200 action_freq [36. 14.] /e 0.085 /pscore 0 /loss 0.050352827 / Q_MAX -9.777473e-01
ts 442250 action_freq [41.  9.] /e 0.085 /pscore 0 /loss 0.11044721 / Q_MAX -9.077282e-01
ts 442300 action_freq [37. 13.] /e 0.085 /pscore 0 /loss 0.055322543 / Q_MAX -6.979119e-01
ts 442350 action_freq [42.  8.] /e 0.085 /pscore 0 /loss 0.031830605 / Q_MAX -7.116721e-01
ts 442400 action_freq [36. 14.] /e 0.085 /pscore 0 /loss 0.08436702 / Q_MAX -7.979437e-01
ts 442460 action_freq [43. 17.] /e 0.085 /pscore 0 /loss 0.37309316 / Q_MAX -1.255427e+00
ts 442510 action_freq [26. 24.] /e 0.085 /pscore 0 /loss 0.18605383 / Q_MAX -6.690053e-01
ts 442560 action_freq [28. 22.] /e 0.085 /pscore 0 /loss 0.7876338 / Q_MAX -6.176435e-01
ts 442626 action_freq [47. 19.] /e 0.085 /pscore 1 /loss 0.07830317 / Q_MAX 5.698147e-01
ts 442712 a

ts 449583 action_freq [82. 22.] /e 0.085 /pscore 2 /loss 0.5322749 / Q_MAX 7.417735e+00
ts 449742 action_freq [133.  26.] /e 0.085 /pscore 3 /loss 0.21690026 / Q_MAX -1.035913e+00
ts 449792 action_freq [43.  7.] /e 0.085 /pscore 0 /loss 0.080841556 / Q_MAX -9.836123e-01
ts 449842 action_freq [22. 28.] /e 0.085 /pscore 0 /loss 0.05009351 / Q_MAX -1.167781e+00
ts 449975 action_freq [116.  17.] /e 0.085 /pscore 2 /loss 0.09046871 / Q_MAX -5.674681e-02
ts 450038 action_freq [48. 15.] /e 0.085 /pscore 1 /loss 0.05636124 / Q_MAX -1.795353e+00
ts 450204 action_freq [145.  21.] /e 0.085 /pscore 3 /loss 0.46739197 / Q_MAX -4.481440e-01
ts 450254 action_freq [33. 17.] /e 0.085 /pscore 0 /loss 0.08925899 / Q_MAX -8.543571e-01
ts 450304 action_freq [35. 15.] /e 0.085 /pscore 0 /loss 0.6158324 / Q_MAX -1.037276e+00
ts 450354 action_freq [39. 11.] /e 0.085 /pscore 0 /loss 0.16822532 / Q_MAX -6.204215e-01
ts 450440 action_freq [62. 24.] /e 0.085 /pscore 1 /loss 0.23086618 / Q_MAX -8.216481e-01
ts 450

ts 457986 action_freq [44. 13.] /e 0.085 /pscore 0 /loss 0.056180187 / Q_MAX -1.607243e+00
ts 458146 action_freq [123.  37.] /e 0.085 /pscore 3 /loss 0.3220033 / Q_MAX -1.841399e+00
ts 458232 action_freq [74. 12.] /e 0.085 /pscore 1 /loss 0.18627195 / Q_MAX -1.667225e+00
ts 458282 action_freq [40. 10.] /e 0.085 /pscore 0 /loss 0.11261902 / Q_MAX -1.580145e+00
ts 458406 action_freq [103.  21.] /e 0.085 /pscore 2 /loss 0.23848096 / Q_MAX -1.084284e+00
ts 458462 action_freq [37. 19.] /e 0.085 /pscore 0 /loss 0.34911606 / Q_MAX 8.303233e-01
ts 458600 action_freq [106.  32.] /e 0.085 /pscore 3 /loss 0.21179478 / Q_MAX -1.981890e+00
ts 458723 action_freq [105.  18.] /e 0.085 /pscore 2 /loss 0.04238324 / Q_MAX 3.150289e+00
ts 458848 action_freq [104.  21.] /e 0.085 /pscore 2 /loss 0.12745029 / Q_MAX -6.075767e-01
ts 458898 action_freq [36. 14.] /e 0.085 /pscore 0 /loss 0.045985337 / Q_MAX -6.120207e-01
ts 459065 action_freq [139.  28.] /e 0.085 /pscore 3 /loss 0.41621256 / Q_MAX -8.748946e-01

ts 465937 action_freq [100.  23.] /e 0.085 /pscore 2 /loss 0.02339465 / Q_MAX -1.151352e+00
ts 466029 action_freq [80. 12.] /e 0.085 /pscore 1 /loss 0.19312282 / Q_MAX -2.050653e+00
ts 466098 action_freq [53. 16.] /e 0.085 /pscore 1 /loss 0.6849136 / Q_MAX 5.198404e+00
ts 466370 action_freq [229.  43.] /e 0.085 /pscore 6 /loss 0.29936564 / Q_MAX -7.920926e-01
ts 466467 action_freq [82. 15.] /e 0.084 /pscore 1 /loss 0.18689504 / Q_MAX -2.610135e-01
ts 466518 action_freq [44.  7.] /e 0.084 /pscore 0 /loss 0.18641289 / Q_MAX 1.504015e+01
ts 466568 action_freq [42.  8.] /e 0.084 /pscore 0 /loss 0.4559425 / Q_MAX -7.810421e-01
ts 466618 action_freq [42.  8.] /e 0.084 /pscore 0 /loss 0.14425363 / Q_MAX -1.063175e+00
ts 466750 action_freq [116.  16.] /e 0.084 /pscore 2 /loss 0.05718545 / Q_MAX 2.280982e-01
ts 466948 action_freq [168.  30.] /e 0.084 /pscore 4 /loss 0.19969898 / Q_MAX -3.327788e-01
ts 467034 action_freq [69. 17.] /e 0.084 /pscore 1 /loss 0.103377536 / Q_MAX -8.387922e-01
ts 467

ts 473142 action_freq [59. 10.] /e 0.084 /pscore 1 /loss 0.16209868 / Q_MAX 1.220354e+01
ts 473271 action_freq [110.  19.] /e 0.084 /pscore 2 /loss 0.10082195 / Q_MAX 1.533747e+00
ts 473321 action_freq [34. 16.] /e 0.084 /pscore 0 /loss 0.38440922 / Q_MAX -3.451166e-01
ts 473373 action_freq [36. 16.] /e 0.084 /pscore 0 /loss 0.36642575 / Q_MAX 1.287201e+01
ts 473475 action_freq [80. 22.] /e 0.084 /pscore 2 /loss 0.11890688 / Q_MAX -5.687832e-01
ts 473525 action_freq [28. 22.] /e 0.084 /pscore 0 /loss 1.0843494 / Q_MAX -1.378279e+00
ts 473575 action_freq [16. 34.] /e 0.084 /pscore 0 /loss 0.22909354 / Q_MAX -7.140990e-01
ts 473625 action_freq [28. 22.] /e 0.084 /pscore 0 /loss 0.15530607 / Q_MAX -1.140434e+00
ts 473675 action_freq [36. 14.] /e 0.084 /pscore 0 /loss 0.0566257 / Q_MAX -9.860064e-01
ts 473725 action_freq [35. 15.] /e 0.084 /pscore 0 /loss 0.31277496 / Q_MAX -9.301603e-01
ts 473778 action_freq [48.  5.] /e 0.084 /pscore 0 /loss 0.43622607 / Q_MAX -1.089296e+00
ts 473828 act

In [None]:
Jai Gurudev