# Matrix Capsule Network for RL

In [1]:
import tensorflow as tf
import tensorflow.contrib.slim as slim
import cv2
import sys
import pong_fun as game # whichever is imported "as game" will be used
import random
import time 
import numpy as np
from collections import deque

  from ._conv import register_converters as _register_converters


pygame 1.9.4
Hello from the pygame community. https://www.pygame.org/contribute.html


# Parameters for the MatrixNet

In [2]:
iter_routing = 2
ac_lambda0 = 0.01, 
#'\lambda in the activation function a_c, iteration 0')
ac_lambda_step = 0.01,
#'It is described that \lambda increases at each iteration with a fixed schedule, however specific super parameters is absent.')
epsilon = 1e-9

################################
A = 32 # , 'number of channels in output from ReLU Conv1')
B = 8 # , 'number of capsules in output from PrimaryCaps')
C = 16 #, 'number of channels in output from ConvCaps1')
D = 16 # , 'number of channels in output from ConvCaps2')
is_train = True
num_classes = 10
train_freq = 10

# Parameters for the Agent

In [3]:
ACTIONS = 6 # number of valid actions
GAMMA = 0.99 # decay rate of past observations
OBSERVE = 500. # timesteps to observe before training
EXPLORE = 5000. # frames over which to anneal epsilon
FINAL_EPSILON = 0.05 # final value of epsilon
INITIAL_EPSILON = 1.0 # starting value of epsilon
REPLAY_MEMORY = 50000 # number of previous transitions to remember
step-85 loss= 2.2954699993133545, train_accuracy= 10.0, test_acc= 13.333333969116211
step-90 loss= 2.2949106693267822, train_accuracy= 12.0, test_acc= 6.6666669845581055
step-95 loss= 2.2778632640838623, train_accuracy= 10.0, test_acc= 20.0
step-100 loss= 2.258962631225586, train_accuracy= 16.0, test_acc= 13.333333969116211
step-105 loss= 2.262554883956909, train_accuracy= 22.0, test_acc= 20.0
step-110 loss= 2.2702789306640625, train_accuracy= 22.0, test_acc= 26.666667938232422
BATCH = 32 # size of minibatch
batch_size = BATCH

# Function for the MatrixNet

In [4]:
def kernel_tile(input, kernel, stride):
    # output = tf.extract_image_patches(input, ksizes=[1, kernel, kernel, 1], strides=[1, stride, stride, 1], rates=[1, 1, 1, 1], padding='VALID')

    input_shape = input.get_shape()
    tile_filter = np.zeros(shape=[kernel, kernel, input_shape[3],
                                  kernel * kernel], dtype=np.float32)
    for i in range(kernel):
        for j in range(kernel):
            tile_filter[i, j, :, i * kernel + j] = 1.0

    tile_filter_op = tf.constant(tile_filter, dtype=tf.float32)
    output = tf.nn.depthwise_conv2d(input, tile_filter_op, strides=[
                                    1, stride, stride, 1], padding='VALID')
    output_shape = output.get_shape()
    output = tf.reshape(output, shape=[-1, int( # -1== int(output_shape[0])
        output_shape[1]), int(output_shape[2]), int(input_shape[3]), kernel * kernel])
    #print(output.get_shape(),"fdsggs")
    output = tf.transpose(output, perm=[0, 1, 2, 4, 3])

    return output

# input should be a tensor with size as [batch_size, caps_num_i, 16]
def mat_transform(input, caps_num_c, regularizer, bs):
    #batch_size = input.get_shape()[0]
    caps_num_i = int(input.get_shape()[1])
    output = tf.reshape(input, shape=[-1, caps_num_i, 1, 4, 4])# batch_size = -1
    # the output of capsule is miu, the mean of a Gaussian, and activation, the sum of probabilities
    # it has no relationship with the absolute values of w and votes
    # using weights with bigger stddev helps numerical stability
    w = slim.variable('w', shape=[1, caps_num_i, caps_num_c, 4, 4], dtype=tf.float32,
                      initializer=tf.truncated_normal_initializer(mean=0.0, stddev=1.0),
                      regularizer=regularizer)
    #print("w",w.get_shape())
    with tf.variable_scope('tile___1'):
        w = tf.tile(w, [bs, 1, 1, 1, 1])
    #print("w",w.get_shape())
    with tf.variable_scope('tile___2'):
        output = tf.tile(output, [1, 1, caps_num_c, 1, 1])
    with tf.variable_scope('tile___3'):
        k = tf.matmul(output, w)
        votes = tf.reshape(k, [-1, caps_num_i, caps_num_c, 16]) #batch_size = -1
    #votes = tf.reshape(tf.matmul(output, w), [batch_size, caps_num_i, caps_num_c, 16])

    return votes


In [5]:
def em_routing(votes, activation, caps_num_c, regularizer, r_in):
    test = []

    #batch_size = votes.get_shape()[0]
    caps_num_i = int(activation.get_shape()[1])
    n_channels = int(votes.get_shape()[-1])

    sigma_square = []
    miu = []
    activation_out = []
    beta_v = slim.variable('beta_v', shape=[caps_num_c, n_channels], dtype=tf.float32,
                           initializer=tf.constant_initializer(0.0),#tf.truncated_normal_initializer(mean=0.0, stddev=0.01),
                           regularizer=regularizer)
    beta_a = slim.variable('beta_a', shape=[caps_num_c], dtype=tf.float32,
                           initializer=tf.constant_initializer(0.0),#tf.truncated_normal_initializer(mean=0.0, stddev=0.01),
                           regularizer=regularizer)

    # votes_in = tf.stop_gradient(votes, name='stop_gradient_votes')
    # activation_in = tf.stop_gradient(activation, name='stop_gradient_activation')
    votes_in = votes
    #print(votes_in,' = votes')
    activation_in = activation

    for iters in range(iter_routing):
        # if iters == cfg.iter_routing-1:

        # e-step
        if iters == 0:
            r = r_in# tf.constant(np.ones([batch_size, caps_num_i, caps_num_c], dtype=np.float32) / caps_num_c)
            #print(r.get_shape(),"r shape__________")
        else:
            # Contributor: Yunzhi Shi
            # log and exp here provide higher numerical stability especially for bigger number of iterations
            log_p_c_h = -tf.log(tf.sqrt(sigma_square)) - \
                        (tf.square(votes_in - miu) / (2 * sigma_square))
            log_p_c_h = log_p_c_h - \
                        (tf.reduce_max(log_p_c_h, axis=[2, 3], keep_dims=True) - tf.log(10.0))
            p_c = tf.exp(tf.reduce_sum(log_p_c_h, axis=3))

            ap = p_c * tf.reshape(activation_out, shape=[-1, 1, caps_num_c]) # batch_size
            #print(ap.get_shape(),"ap")
            # ap = tf.reshape(activation_out, shape=[batch_size, 1, caps_num_c])

            r = ap / (tf.reduce_sum(ap, axis=2, keepdims=True) + epsilon)

        # m-step
        r = r * activation_in
        r = r / (tf.reduce_sum(r, axis=2, keepdims=True)+epsilon)

        r_sum = tf.reduce_sum(r, axis=1, keepdims=True)
        r1 = tf.reshape(r / (r_sum + epsilon),
                        shape=[-1 , caps_num_i, caps_num_c, 1]) # batch_size
        #print(r1.get_shape(),"r1")
        miu = tf.reduce_sum(votes_in * r1, axis=1, keepdims=True)
        sigma_square = tf.reduce_sum(tf.square(votes_in - miu) * r1,
                                     axis=1, keepdims=True) + epsilon

        if iters == iter_routing-1:
            r_sum = tf.reshape(r_sum, [-1, caps_num_c, 1])  # batch_size
            #print(r_sum.get_shape(),"r_sum")
            cost_h = (beta_v + tf.log(tf.sqrt(tf.reshape(sigma_square,
                                                         shape=[-1, caps_num_c, n_channels])))) * r_sum
            #print(cost_h.get_shape(),"cost_h") # batch_size
            activation_out = tf.nn.softmax(ac_lambda0 * (beta_a - tf.reduce_sum(cost_h, axis=2)))
        else:
            activation_out = tf.nn.softmax(r_sum)
        # if iters <= cfg.iter_routing-1:
        #     activation_out = tf.stop_gradient(activation_out, name='stop_gradient_activation')

    return miu, activation_out, test

In [6]:
def get_coord_add(dataset_name: str):
    # TODO: get coord add for cifar10/100 datasets (32x32x3)
    options = {'mnist': ([[[8., 8.], [12., 8.], [16., 8.]],
                          [[8., 12.], [12., 12.], [16., 12.]],
                          [[8., 16.], [12., 16.], [16., 16.]]], 28.),
               'smallNORB': ([[[8., 8.], [12., 8.], [16., 8.], [24., 8.]],
                              [[8., 12.], [12., 12.], [16., 12.], [24., 12.]],
                              [[8., 16.], [12., 16.], [16., 16.], [24., 16.]],
                              [[8., 24.], [12., 24.], [16., 24.], [24., 24.]]], 32.)
               }
    coord_add, scale = options[dataset_name]

    coord_add = np.array(coord_add, dtype=np.float32) / scale

    return coord_add

# Agent for the MatCap DON 

In [7]:
def createNetwork():
    # -------------------------------------------------------------------------------------
    s = tf.placeholder(tf.float32, shape=(None, 80, 80, 4), name='X')
    bs = tf.placeholder(tf.int32, shape=(), name='bs')
    #bs = batch_size
    r_conv_caps1= tf.placeholder(tf.float32,[None, 72, C], name='r_conv_caps1') # 5*5*batch_size
    #r_conv_caps1= tf.placeholder(tf.float32,[5*5*batch_size, 72, C], name='r_conv_caps1') # 5*5*batch_size

    r_conv_caps2 = tf.placeholder(tf.float32,[None, 144, D], name='r_conv_caps2') # 3*3*batch_size
    #r_conv_caps2 = tf.placeholder(tf.float32,[3*3*batch_size, 144, D], name='r_conv_caps2') # 3*3*batch_size

    r_class_caps = tf.placeholder(tf.float32,[None, 16, num_classes], name='r_class_caps') # 3*3*batch_size
    #r_class_caps = tf.placeholder(tf.float32,[3*3*batch_size, 16, num_classes], name='r_class_caps') # 3*3*batch_size

    #coord_add_op_class_caps  = tf.placeholder(tf.float32,[3*3*batch_size, 16, num_classes, 2], name='coord_add_op_class_caps')
                                                                     # 3*3*batch_size
    coord_add_op_class_caps  = tf.placeholder(tf.float32,[None, 16, num_classes, 2], name='coord_add_op_class_caps')
    # 3*3*batch_size
    # -------------------------------------------------------------------------------------
    data_size = int(s.get_shape()[1])
    # xavier initialization is necessary here to provide higher stability
    initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
    # instead of initializing bias with constant 0, 
    # a truncated normal initializer is exploited here for higher stability 
    bias_initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)  # tf.constant_initializer(0.0)
    # The paper didnot mention any regularization, a common l2 regularizer to weights is added here
    weights_regularizer = tf.contrib.layers.l2_regularizer(5e-04)
    # weights_initializer=initializer,
    with slim.arg_scope([slim.conv2d], trainable=is_train, biases_initializer=bias_initializer, weights_regularizer=weights_regularizer):
        with tf.variable_scope('relu_conv1'):
            output = slim.conv2d(s, num_outputs=A, kernel_size=[10, 10], stride=6, padding='VALID', scope='relu_conv1', activation_fn=tf.nn.relu)
            data_size = int(np.floor((data_size - 10) / 6)) + 1
            #print(output.get_shape())
            #print(data_size)
            #assert output.get_shape() == [batch_size, data_size, data_size, 32]
            votes__1 = output
        with tf.variable_scope('primary_caps'):
            pose = slim.conv2d(output, num_outputs=B * 16,kernel_size=[1, 1], stride=1, padding='VALID', scope='primary_caps', activation_fn=None)
            activation = slim.conv2d(output, num_outputs=B, kernel_size=[
                                     1, 1], stride=1, padding='VALID', scope='primary_caps/activation', activation_fn=tf.nn.sigmoid)
            pose = tf.reshape(pose, shape=[-1, data_size, data_size, B, 16]) # (50, 12, 12, 8, 16)

            #print(pose.get_shape())
            activation = tf.reshape(activation, shape=[-1, data_size, data_size, B, 1]) # (50, 12, 12, 8, 1)
            #print(activation.get_shape())
            output = tf.concat([pose, activation], axis=4)
            output = tf.reshape(output, shape=[-1, data_size, data_size, B * 17]) # (50, 12, 12, 136)
            #print(output.get_shape())

            #assert output.get_shape() == [batch_size, data_size, data_size, B * 17]
        with tf.variable_scope('conv_caps1') as scope:
            output = kernel_tile(output, 3, 2)
            data_size = int(np.floor((data_size - 2) / 2))
            #print(data_size) # 5 
            output = tf.reshape(output, shape=[-1, 3 * 3 * B, 17]) 
            # batch_size * data_size * data_size  (1250, 72, 17) 
            #print("1",output.get_shape())
            activation = tf.reshape(output[:, :, 16], shape=[-1, 3 * 3 * B, 1])
            #print("output shape ---------------",output.get_shape())
            #print("activation shape----------------------",activation.get_shape()) #  (1250, 72, 1)

            with tf.variable_scope('v') as scope:
                votes = mat_transform(output[:, :, :16], C, weights_regularizer, bs = bs*data_size*data_size)
                #bs*data_size*data_size)

                #print(votes.get_shape(),"votes shape")
            with tf.variable_scope('routing') as scope:
                caps_num_i = int(activation.get_shape()[1])

                miu, activation, _ = em_routing(votes, activation, C, weights_regularizer,r_conv_caps1)
                # miu, activation, _ = em_routing(votes, activation, C, weights_regularizer)
                #print("activation",activation.get_shape())
            pose = tf.reshape(miu, shape=[-1, data_size, data_size, C, 16])
            #print("3",pose.get_shape()) # 50, 5, 5, 16, 16)
            activation = tf.reshape(activation, shape=[-1, data_size, data_size, C, 1])
            #print("activation",activation.get_shape())
            cat_size =  activation.get_shape()[3]*activation.get_shape()[4] + pose.get_shape()[3] *pose.get_shape()[4]
            #print(cat_size)
            output = tf.reshape(tf.concat([pose, activation], axis=4),[-1, data_size, data_size, cat_size])
            #print("5",output.get_shape()) # (50, 5, 5, 272)


        with tf.variable_scope('conv_caps2') as scope:
            output = kernel_tile(output, 3, 1)

            data_size = int(np.floor((data_size - 2) / 1))

            output = tf.reshape(output, shape=[-1, 3 * 3 * C, 17]) # batch_size * data_size * data_size
            #print("canv_caps2",output.get_shape(), data_size)
            activation = tf.reshape(output[:, :, 16], shape=[-1 , 3 * 3 * C, 1]) # batch_size * data_size * data_size
            #print("canv_caps2_activation",activation.get_shape(), data_size)

            with tf.variable_scope('v') as scope:
                votes = mat_transform(output[:, :, :16], D, weights_regularizer,bs = bs*data_size*data_size)
                #print(votes.get_shape(),"votes shape")

            with tf.variable_scope('routing') as scope:
                caps_num_i = int(activation.get_shape()[1])
                #print(caps_num_i,"for 1")
                miu, activation, _ = em_routing(votes, activation, D, weights_regularizer, r_conv_caps2)

            pose = tf.reshape(miu, shape=[-1, D, 16]) # batch_size * data_size * data_size
            #print("4",pose.get_shape())
            #tf.logging.info('conv cap 2 pose shape: {}'.format(votes.get_shape()))
            activation = tf.reshape(activation, shape=[-1, D, 1]) # batch_size * data_size * data_size
            #print("4 ---activation",activation.get_shape())

        with tf.variable_scope('class_caps') as scope:
            with tf.variable_scope('v') as scope:
                votes = mat_transform(pose, num_classes, weights_regularizer,bs = bs*data_size*data_size)
                #print(votes.get_shape(),"votes.getshape")
                #assert votes.get_shape()[1:] == [D, num_classes, 16]
                #tf.logging.info('class cap votes original shape: {}'.format(votes.get_shape()))
                '''coord_add = get_coord_add('mnist') 
                coord_add = np.reshape(coord_add, newshape=[data_size * data_size, 1, 1, 2])
                coord_add = np.tile(coord_add, [bs, D, num_classes, 1])
                coord_add_op = tf.constant(coord_add, dtype=tf.float32)
                print("___coord_add______",coord_add_op.shape)'''

                votes = tf.concat([coord_add_op_class_caps, votes], axis=3)
                #tf.logging.info('class cap votes coord add shape: {}'.format(votes.get_shape()))
                #print(votes.get_shape(),"coorr vote shape after  jnbfv")
            with tf.variable_scope('routing') as scope:
                caps_num_i = int(activation.get_shape()[1])
                #print("_____",caps_num_i)
                miu, activation, test2 = em_routing(votes, activation, num_classes, weights_regularizer,r_class_caps)

            output = tf.reshape(activation, shape=[-1, data_size, data_size, num_classes]) #batch_size
            #print("d op",output.get_shape())
        output = tf.reshape(tf.nn.avg_pool(output, ksize=[1, data_size, data_size, 1], strides=[
                    1, 1, 1, 1], padding='VALID'), shape=[-1, num_classes]) # batch_size
        #print("miu  2",miu.get_shape())
        pose = tf.nn.avg_pool(tf.reshape(miu, shape=[-1, data_size, data_size,miu.get_shape()[2]*miu.get_shape()[3]
                                    ]), ksize=[1, data_size, data_size, 1], strides=[1, 1, 1, 1], padding='VALID')
        #print("output_size_posssss",pose.get_shape())
        #miu  2 (450, 1, 10, 18)
        #output_size_posssss (50, 1, 1, 180)
        pose_out = tf.reshape(pose, shape=[-1, num_classes, 18])
        #print("output_size",pose_out.get_shape())
        vector_j = tf.reshape(pose_out, shape= [-1, num_classes * 18])
        #print("pose_out",pose_out.get_shape())
        #print("vector_j",vector_j.get_shape())
    with tf.variable_scope('output_layer') as scope:
        readout = tf.contrib.layers.fully_connected(vector_j, num_outputs=ACTIONS, activation_fn=None) 
    print("Network has been created successfully")
    return s, readout, bs, r_conv_caps1, r_conv_caps2, r_class_caps, coord_add_op_class_caps

# Environment for Training of the Agent

In [8]:
def trainNetwork(sess, s, readout, bs, r_conv_caps1, r_conv_caps2, r_class_caps, coord_add_op_class_caps):
    tick = time.time()
    # define the cost function
    a = tf.placeholder("float", [None, ACTIONS])
    y = tf.placeholder("float", [None])
    readout_action = tf.reduce_sum(tf.multiply(readout, a), reduction_indices = 1)
    cost = tf.reduce_mean(tf.square(y - readout_action))
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cost)

    # open up a game state to communicate with emulator
    game_state = game.GameState()
    # store the previous observations in replay memory
    replay_memory = deque()
    # get the first state by doing nothing and preprocess the image to 80x80x4
    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] = 1
    x_t, r_0, terminal, bar1_score, bar2_score = game_state.frame_step(do_nothing)
    x_t = cv2.cvtColor(cv2.resize(x_t, (80, 80)), cv2.COLOR_BGR2GRAY)
    ret, x_t = cv2.threshold(x_t,1,255,cv2.THRESH_BINARY)
    s_t = np.stack((x_t, x_t, x_t, x_t), axis = 2)  
    # saving and loading networks
    # saver = tf.train.Saver()
    # sess.run(tf.initialize_all_variables())
    sess.run(tf.global_variables_initializer())
    #-------------- initialization for EM routing ------------------------------------
    # ----------- for taking actions ---------------------
    batch_size_pred = 1
    coord_add_p = get_coord_add('mnist') 
    coord_add_p = np.reshape(coord_add_p, newshape=[3 * 3, 1, 1, 2]) #data_size =3
    coord_add_p = np.tile(coord_add_p, [batch_size_pred, D, num_classes, 1])
    r_conv_caps1_p = np.ones([5*5*batch_size_pred, 72, C]) / C
    r_conv_caps2_p = np.ones([3*3*batch_size_pred, 144, D]) / D
    r_class_caps_p = np.ones([3*3*batch_size_pred, 16, num_classes]) / num_classes
    
    # --------- initialization for training -------------------------------------------
    batch_size_train = batch_size
    coord_add_t = get_coord_add('mnist') 
    coord_add_t = np.reshape(coord_add_t, newshape=[3 * 3, 1, 1, 2]) #data_size =3
    coord_add_t = np.tile(coord_add_t, [batch_size_train, D, num_classes, 1])
    r_conv_caps1_t = np.ones([5*5*batch_size_train, 72, C]) / C
    r_conv_caps2_t = np.ones([3*3*batch_size_train, 144, D]) / D
    r_class_caps_t = np.ones([3*3*batch_size_train, 16, num_classes]) / num_classes
    #------------------------------------ ENDS ---------------------------------------
    
    epsilon = INITIAL_EPSILON
    t = 0
    episode = 0
    while True:
        # choose an action epsilon greedily
        # readout_t = readout.eval(feed_dict = {s : [s_t].reshape((1,80,80,4))})[0]
        readout_t = readout.eval(feed_dict = {s:s_t.reshape((1,80,80,4)),
                                              r_conv_caps1: r_conv_caps1_p,
                                              r_conv_caps2: r_conv_caps2_p,
                                              r_class_caps: r_class_caps_p,
                                              coord_add_op_class_caps: coord_add_p,
                                              bs: np.int32(batch_size_pred)})
        #readout_t = readout.eval(feed_dict = {s:s_t.reshape((1,84,84,4)), coeff:b_IJ1})
        
        a_t = np.zeros([ACTIONS])
        action_index = 0
        if random.random() <= epsilon or t <= OBSERVE:
            action_index = random.randrange(ACTIONS)
            a_t[action_index] = 1
        else:
            action_index = np.argmax(readout_t)
            a_t[action_index] = 1

        # scale down epsilon
        if epsilon > FINAL_EPSILON and t > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE

        # run the selected action and observe next state and reward
        x_t1_col, r_t, terminal, bar1_score, bar2_score = game_state.frame_step(a_t)
        if(terminal == 1):
            episode +=1
        x_t1 = cv2.cvtColor(cv2.resize(x_t1_col, (80, 80)), cv2.COLOR_BGR2GRAY)
        ret, x_t1 = cv2.threshold(x_t1,1,255,cv2.THRESH_BINARY)
        x_t1 = np.reshape(x_t1, (80, 80, 1))
        s_t1 = np.append(x_t1, s_t[:,:,0:3], axis = 2)

        # store the transition in D
        replay_memory.append((s_t, a_t, r_t, s_t1, terminal))
        if len(replay_memory ) > REPLAY_MEMORY:
            replay_memory.popleft()
            
        # only train if done observing
        if t > OBSERVE and t%train_freq==0:
            # sample a minibatch to train on
            minibatch = random.sample(replay_memory , BATCH)
            
            # get the batch variables
            s_j_batch = [d[0] for d in minibatch]
            a_batch = [d[1] for d in minibatch]
            r_batch = [d[2] for d in minibatch]
            s_j1_batch = [d[3] for d in minibatch]

            y_batch = []
            readout_j1_batch = readout.eval(feed_dict = {s:s_j1_batch,
                                                         r_conv_caps1: r_conv_caps1_t,
                                                         r_conv_caps2: r_conv_caps2_t,
                                                         r_class_caps: r_class_caps_t,
                                                         coord_add_op_class_caps: coord_add_t,
                                                         bs: np.int32(batch_size_train)})
            #readout_j1_batch = readout.eval(feed_dict = {s:s_j1_batch, coeff:b_IJ2 })

            for i in range(0, len(minibatch)):
                # if terminal only equals reward
                if minibatch[i][4]:
                    y_batch.append(r_batch[i])
                else:
                    y_batch.append(r_batch[i] + GAMMA * np.max(readout_j1_batch[i]))

            # perform gradient step
            train_step.run(feed_dict = {
                y : y_batch,
                a : a_batch,
                s : s_j_batch,
                r_conv_caps1: r_conv_caps1_t,
                r_conv_caps2: r_conv_caps2_t,
                r_class_caps: r_class_caps_t,
                coord_add_op_class_caps: coord_add_t,
                bs: np.int32(batch_size_train)})

        # update the old values
        s_t = s_t1
        t += 1

        # save progress every 10000 iterations
        #if t % 10000 == 0:
        #    saver.save(sess, 'saved_networks/' + GAME + '-dqn', global_step = t)
        if r_t!= 0:
            print ("TIMESTEP", t, "/ e", episode, "/ bar1_score", bar1_score, "/ bar2_score", bar2_score, "/ REWARD", r_t, "/ Q_MAX %e" % np.max(readout_t))

        if( (bar1_score - bar2_score) > 18): 
            print("Game_Ends_in Time:",int(time.time() - tick))
            break;   
            
        # write info to files
        '''
        if t % 10000 <= 100:
            a_file.write(",".join([str(x) for x in readout_t]) + '\n')
            h_file.write(",".join([str(x) for x in h_fc1.eval(feed_dict={s:[s_t]})[0]]) + '\n')
            cv2.imwrite("logs_tetris/frame" + str(t) + ".png", x_t1)
        '''

In [9]:
def playGame():
    tf.reset_default_graph()
    sess = tf.InteractiveSession()
    s, readout, bs, r_conv_caps1, r_conv_caps2, r_class_caps, coord_add_op_class_caps = createNetwork()
    trainNetwork(sess, s, readout, bs, r_conv_caps1, r_conv_caps2, r_class_caps, coord_add_op_class_caps)

In [None]:
def main():
    playGame()

if __name__ == "__main__":
    tick = time.time()
    main()
    print("Game_Ends_in Time:",int(time.time() - tick))
    print("____________ END HERE _____________")

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Network has been created successfully
TIMESTEP 129 / e 0 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX 0.000000e+00
TIMESTEP 175 / e 0 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX 0.000000e+00
TIMESTEP 221 / e 0 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX 0.000000e+00
TIMESTEP 267 / e 0 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX 0.000000e+00
TIMESTEP 313 / e 0 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX 0.000000e+00
TIMESTEP 359 / e 0 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX 0.000000e+00
TIMESTEP 405 / e 0 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX 0.000000e+00
TIMESTEP 451 / e 0 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX 0.000000e+00
TIMESTEP 497 / e 0 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX 0.000000e+00
TIMESTEP 543 / e 0 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX 0.000000e+00
TIMESTEP 589 / e 0 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MA

TIMESTEP 5288 / e 4 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -8.903151e-03
TIMESTEP 5334 / e 4 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -9.048581e-03
TIMESTEP 5380 / e 4 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -9.143887e-03
TIMESTEP 5426 / e 4 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -9.218414e-03
TIMESTEP 5472 / e 4 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -9.403991e-03
TIMESTEP 5518 / e 4 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -9.519003e-03
TIMESTEP 5564 / e 4 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -9.774879e-03
TIMESTEP 5610 / e 4 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -9.902191e-03
TIMESTEP 5656 / e 4 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX -1.003198e-02
TIMESTEP 5702 / e 5 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -1.010540e-02
TIMESTEP 5748 / e 5 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -1.016859e-02
TIMESTEP 5794 / e 5 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_M

TIMESTEP 9750 / e 9 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -2.151313e-02
TIMESTEP 9796 / e 9 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -2.166249e-02
TIMESTEP 9842 / e 9 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -2.175018e-02
TIMESTEP 9888 / e 9 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -2.188418e-02
TIMESTEP 9934 / e 9 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -2.201766e-02
TIMESTEP 9980 / e 9 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -2.214701e-02
TIMESTEP 10026 / e 9 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -2.228423e-02
TIMESTEP 10072 / e 9 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -2.234680e-02
TIMESTEP 10118 / e 9 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -2.237749e-02
TIMESTEP 10164 / e 9 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -2.240090e-02
TIMESTEP 10210 / e 9 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -2.241198e-02
TIMESTEP 10256 / e 9 / bar1_score 0 / bar2_score 19 / REWARD -

TIMESTEP 14120 / e 14 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -3.275235e-02
TIMESTEP 14166 / e 14 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -3.282166e-02
TIMESTEP 14212 / e 14 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -3.286148e-02
TIMESTEP 14258 / e 14 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX -3.288040e-02
TIMESTEP 14304 / e 14 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -3.289424e-02
TIMESTEP 14350 / e 14 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -3.297732e-02
TIMESTEP 14396 / e 14 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -3.306624e-02
TIMESTEP 14442 / e 14 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -3.311780e-02
TIMESTEP 14488 / e 14 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -3.314268e-02
TIMESTEP 14534 / e 14 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -3.333593e-02
TIMESTEP 14580 / e 14 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -3.350814e-02
TIMESTEP 14626 / e 14 / bar1_score 0 / bar2_score 

TIMESTEP 18490 / e 18 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -4.583522e-02
TIMESTEP 18536 / e 18 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX -4.594273e-02
TIMESTEP 18582 / e 19 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -4.609253e-02
TIMESTEP 18628 / e 19 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -4.620370e-02
TIMESTEP 18674 / e 19 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -4.642197e-02
TIMESTEP 18720 / e 19 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -4.656129e-02
TIMESTEP 18766 / e 19 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -4.676224e-02
TIMESTEP 18812 / e 19 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -4.697235e-02
TIMESTEP 18858 / e 19 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX -4.713206e-02
TIMESTEP 18904 / e 19 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -4.728532e-02
TIMESTEP 18950 / e 19 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -4.751297e-02
TIMESTEP 18996 / e 19 / bar1_score 0 / bar2_score 9 

TIMESTEP 22860 / e 23 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -5.896098e-02
TIMESTEP 22906 / e 23 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -5.917897e-02
TIMESTEP 22952 / e 23 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -5.930581e-02
TIMESTEP 22998 / e 23 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -5.939587e-02
TIMESTEP 23044 / e 23 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -5.954746e-02
TIMESTEP 23090 / e 23 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -5.962131e-02
TIMESTEP 23136 / e 23 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX -5.980444e-02
TIMESTEP 23182 / e 24 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -6.001850e-02
TIMESTEP 23228 / e 24 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -6.012328e-02
TIMESTEP 23274 / e 24 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -6.020307e-02
TIMESTEP 23320 / e 24 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -6.036870e-02
TIMESTEP 23366 / e 24 / bar1_score 0 / bar2_sco

TIMESTEP 27396 / e 28 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -7.344852e-02
TIMESTEP 27442 / e 28 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -7.367249e-02
TIMESTEP 27488 / e 28 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -7.386551e-02
TIMESTEP 27534 / e 28 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -7.412753e-02
TIMESTEP 27580 / e 28 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -7.428376e-02
TIMESTEP 27626 / e 28 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -7.439227e-02
TIMESTEP 27672 / e 28 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -7.447340e-02
TIMESTEP 27718 / e 28 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -7.450629e-02
TIMESTEP 27764 / e 28 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -7.462988e-02
TIMESTEP 27810 / e 28 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -7.470526e-02
TIMESTEP 27856 / e 28 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -7.494590e-02
TIMESTEP 27902 / e 28 / bar1_score 0 / bar2_sc

TIMESTEP 31766 / e 33 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -8.528635e-02
TIMESTEP 31812 / e 33 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -8.550955e-02
TIMESTEP 31858 / e 33 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -8.581390e-02
TIMESTEP 31904 / e 33 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -8.610675e-02
TIMESTEP 31950 / e 33 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX -8.630892e-02
TIMESTEP 31996 / e 33 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -8.652143e-02
TIMESTEP 32042 / e 33 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -8.664551e-02
TIMESTEP 32088 / e 33 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -8.670624e-02
TIMESTEP 32134 / e 33 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -8.675199e-02
TIMESTEP 32180 / e 33 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -8.687970e-02
TIMESTEP 32226 / e 33 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -8.697852e-02
TIMESTEP 32272 / e 33 / bar1_score 0 / bar2_score 1

TIMESTEP 36303 / e 37 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -9.768645e-02
TIMESTEP 36349 / e 37 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -9.771935e-02
TIMESTEP 36395 / e 37 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX -9.774227e-02
TIMESTEP 36441 / e 38 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -9.780883e-02
TIMESTEP 36487 / e 38 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -9.791434e-02
TIMESTEP 36533 / e 38 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -9.809834e-02
TIMESTEP 36579 / e 38 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -9.818716e-02
TIMESTEP 36625 / e 38 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -9.825295e-02
TIMESTEP 36671 / e 38 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -9.838783e-02
TIMESTEP 36717 / e 38 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX -9.852107e-02
TIMESTEP 36763 / e 38 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -9.874911e-02
TIMESTEP 36809 / e 38 / bar1_score 0 / bar2_score 8

TIMESTEP 40673 / e 42 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -1.110238e-01
TIMESTEP 40719 / e 42 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -1.110976e-01
TIMESTEP 40765 / e 42 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -1.111505e-01
TIMESTEP 40811 / e 42 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -1.111990e-01
TIMESTEP 40857 / e 42 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -1.113084e-01
TIMESTEP 40903 / e 42 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -1.114667e-01
TIMESTEP 40949 / e 42 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -1.115572e-01
TIMESTEP 40995 / e 42 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX -1.116242e-01
TIMESTEP 41041 / e 43 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -1.117086e-01
TIMESTEP 41087 / e 43 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -1.119955e-01
TIMESTEP 41216 / e 43 / bar1_score 1 / bar2_score 1 / REWARD 1 / Q_MAX -1.122395e-01
TIMESTEP 41345 / e 43 / bar1_score 1 / bar2_sco

TIMESTEP 45209 / e 47 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX -1.237489e-01
TIMESTEP 45255 / e 47 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -1.239281e-01
TIMESTEP 45301 / e 47 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -1.240147e-01
TIMESTEP 45347 / e 47 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -1.240795e-01
TIMESTEP 45393 / e 47 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -1.242483e-01
TIMESTEP 45439 / e 47 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -1.244552e-01
TIMESTEP 45485 / e 47 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -1.246741e-01
TIMESTEP 45531 / e 47 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -1.246769e-01
TIMESTEP 45577 / e 47 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -1.247508e-01
TIMESTEP 45623 / e 47 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -1.248298e-01
TIMESTEP 45669 / e 47 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -1.248672e-01
TIMESTEP 45715 / e 47 / bar1_score 0 / bar2_sco

TIMESTEP 49579 / e 52 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -1.340204e-01
TIMESTEP 49625 / e 52 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -1.341211e-01
TIMESTEP 49671 / e 52 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -1.342357e-01
TIMESTEP 49717 / e 52 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -1.344122e-01
TIMESTEP 49763 / e 52 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -1.345118e-01
TIMESTEP 49809 / e 52 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX -1.345877e-01
TIMESTEP 49855 / e 52 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -1.348175e-01
TIMESTEP 49901 / e 52 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -1.349641e-01
TIMESTEP 49947 / e 52 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -1.350739e-01
TIMESTEP 49993 / e 52 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -1.351332e-01
TIMESTEP 50039 / e 52 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -1.351590e-01
TIMESTEP 50085 / e 52 / bar1_score 0 / bar2_score 12

TIMESTEP 54118 / e 56 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -1.455276e-01
TIMESTEP 54164 / e 56 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -1.455164e-01
TIMESTEP 54210 / e 56 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -1.455687e-01
TIMESTEP 54256 / e 56 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX -1.456653e-01
TIMESTEP 54302 / e 57 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -1.457185e-01
TIMESTEP 54348 / e 57 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -1.457421e-01
TIMESTEP 54394 / e 57 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -1.457551e-01
TIMESTEP 54440 / e 57 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -1.457577e-01
TIMESTEP 54486 / e 57 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -1.458154e-01
TIMESTEP 54532 / e 57 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -1.460028e-01
TIMESTEP 54578 / e 57 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX -1.462398e-01
TIMESTEP 54624 / e 57 / bar1_score 0 / bar2_score 

TIMESTEP 58488 / e 61 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -1.573815e-01
TIMESTEP 58534 / e 61 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -1.573875e-01
TIMESTEP 58580 / e 61 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -1.574671e-01
TIMESTEP 58626 / e 61 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -1.575504e-01
TIMESTEP 58672 / e 61 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -1.576735e-01
TIMESTEP 58718 / e 61 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -1.577652e-01
TIMESTEP 58764 / e 61 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -1.579120e-01
TIMESTEP 58810 / e 61 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -1.580150e-01
TIMESTEP 58856 / e 61 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX -1.580906e-01
TIMESTEP 58902 / e 62 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -1.581308e-01
TIMESTEP 58948 / e 62 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -1.581761e-01
TIMESTEP 58994 / e 62 / bar1_score 0 / bar2_s

TIMESTEP 63027 / e 66 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -1.724934e-01
TIMESTEP 63073 / e 66 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX -1.726412e-01
TIMESTEP 63119 / e 66 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -1.727635e-01
TIMESTEP 63165 / e 66 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -1.729616e-01
TIMESTEP 63211 / e 66 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -1.732870e-01
TIMESTEP 63257 / e 66 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -1.735300e-01
TIMESTEP 63303 / e 66 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -1.736445e-01
TIMESTEP 63349 / e 66 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -1.737708e-01
TIMESTEP 63395 / e 66 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -1.738647e-01
TIMESTEP 63441 / e 66 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -1.740998e-01
TIMESTEP 63487 / e 66 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -1.742713e-01
TIMESTEP 63533 / e 66 / bar1_score 0 / bar2_scor

TIMESTEP 67397 / e 71 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -1.825426e-01
TIMESTEP 67443 / e 71 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -1.827172e-01
TIMESTEP 67489 / e 71 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -1.828762e-01
TIMESTEP 67535 / e 71 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -1.830184e-01
TIMESTEP 67581 / e 71 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -1.830834e-01
TIMESTEP 67627 / e 71 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -1.831264e-01
TIMESTEP 67673 / e 71 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX -1.831448e-01
TIMESTEP 67719 / e 71 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -1.831488e-01
TIMESTEP 67765 / e 71 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -1.831729e-01
TIMESTEP 67811 / e 71 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -1.832563e-01
TIMESTEP 67857 / e 71 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -1.833153e-01
TIMESTEP 67903 / e 71 / bar1_score 0 / bar2_score 11 

TIMESTEP 71767 / e 75 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -1.952467e-01
TIMESTEP 71813 / e 75 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -1.953072e-01
TIMESTEP 71859 / e 75 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -1.953623e-01
TIMESTEP 71905 / e 75 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -1.956053e-01
TIMESTEP 71951 / e 75 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX -1.957216e-01
TIMESTEP 71997 / e 76 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -1.958965e-01
TIMESTEP 72043 / e 76 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -1.960337e-01
TIMESTEP 72089 / e 76 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -1.960902e-01
TIMESTEP 72135 / e 76 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -1.961577e-01
TIMESTEP 72181 / e 76 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -1.962651e-01
TIMESTEP 72227 / e 76 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -1.963435e-01
TIMESTEP 72273 / e 76 / bar1_score 0 / bar2_score

TIMESTEP 76137 / e 80 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -2.066461e-01
TIMESTEP 76183 / e 80 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -2.067664e-01
TIMESTEP 76229 / e 80 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -2.068228e-01
TIMESTEP 76275 / e 80 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -2.069857e-01
TIMESTEP 76321 / e 80 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -2.070623e-01
TIMESTEP 76367 / e 80 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -2.071168e-01
TIMESTEP 76413 / e 80 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -2.072464e-01
TIMESTEP 76459 / e 80 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -2.074256e-01
TIMESTEP 76505 / e 80 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -2.075587e-01
TIMESTEP 76551 / e 80 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX -2.076203e-01
TIMESTEP 76597 / e 81 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -2.077874e-01
TIMESTEP 76643 / e 81 / bar1_score 0 / bar2_

TIMESTEP 80507 / e 85 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -2.191393e-01
TIMESTEP 80553 / e 85 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX -2.193004e-01
TIMESTEP 80599 / e 85 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -2.193751e-01
TIMESTEP 80645 / e 85 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -2.194279e-01
TIMESTEP 80691 / e 85 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -2.194490e-01
TIMESTEP 80737 / e 85 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -2.194605e-01
TIMESTEP 80783 / e 85 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -2.195442e-01
TIMESTEP 80829 / e 85 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -2.197078e-01
TIMESTEP 80875 / e 85 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -2.198283e-01
TIMESTEP 80921 / e 85 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -2.198845e-01
TIMESTEP 80967 / e 85 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -2.199765e-01
TIMESTEP 81013 / e 85 / bar1_score 0 / bar2_scor

TIMESTEP 84877 / e 90 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -2.286810e-01
TIMESTEP 84923 / e 90 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -2.287559e-01
TIMESTEP 84969 / e 90 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -2.287874e-01
TIMESTEP 85015 / e 90 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -2.288054e-01
TIMESTEP 85061 / e 90 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -2.288086e-01
TIMESTEP 85107 / e 90 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -2.288038e-01
TIMESTEP 85153 / e 90 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX -2.287923e-01
TIMESTEP 85199 / e 90 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -2.287794e-01
TIMESTEP 85245 / e 90 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -2.287633e-01
TIMESTEP 85291 / e 90 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -2.287497e-01
TIMESTEP 85337 / e 90 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -2.288185e-01
TIMESTEP 85383 / e 90 / bar1_score 0 / bar2_score 11 

TIMESTEP 89414 / e 94 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -2.401922e-01
TIMESTEP 89460 / e 94 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -2.401835e-01
TIMESTEP 89506 / e 94 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -2.402998e-01
TIMESTEP 89552 / e 94 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -2.403611e-01
TIMESTEP 89598 / e 94 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX -2.404447e-01
TIMESTEP 89644 / e 95 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -2.405615e-01
TIMESTEP 89690 / e 95 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -2.407314e-01
TIMESTEP 89736 / e 95 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -2.409778e-01
TIMESTEP 89782 / e 95 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -2.411834e-01
TIMESTEP 89828 / e 95 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -2.412367e-01
TIMESTEP 89874 / e 95 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -2.412719e-01
TIMESTEP 89920 / e 95 / bar1_score 0 / bar2_score

TIMESTEP 94118 / e 99 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -2.517328e-01
TIMESTEP 94164 / e 99 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -2.517191e-01
TIMESTEP 94210 / e 99 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -2.517056e-01
TIMESTEP 94256 / e 99 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -2.516909e-01
TIMESTEP 94302 / e 99 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -2.516758e-01
TIMESTEP 94348 / e 99 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -2.516614e-01
TIMESTEP 94394 / e 99 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -2.516432e-01
TIMESTEP 94440 / e 99 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -2.516279e-01
TIMESTEP 94486 / e 99 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -2.516069e-01
TIMESTEP 94532 / e 99 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -2.515868e-01
TIMESTEP 94578 / e 99 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX -2.515712e-01
TIMESTEP 94624 / e 100 / bar1_score 0 / bar2

TIMESTEP 98442 / e 104 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -2.594828e-01
TIMESTEP 98488 / e 104 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -2.595039e-01
TIMESTEP 98534 / e 104 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -2.597097e-01
TIMESTEP 98580 / e 104 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX -2.599555e-01
TIMESTEP 98626 / e 104 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -2.601408e-01
TIMESTEP 98672 / e 104 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -2.602422e-01
TIMESTEP 98718 / e 104 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -2.603911e-01
TIMESTEP 98764 / e 104 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -2.606246e-01
TIMESTEP 98810 / e 104 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -2.607332e-01
TIMESTEP 98856 / e 104 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -2.608095e-01
TIMESTEP 98902 / e 104 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -2.609520e-01
TIMESTEP 98948 / e 104 / bar1_score 0 /

TIMESTEP 102720 / e 108 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -2.694147e-01
TIMESTEP 102766 / e 108 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -2.698137e-01
TIMESTEP 102812 / e 108 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -2.700615e-01
TIMESTEP 102858 / e 108 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX -2.701767e-01
TIMESTEP 102904 / e 109 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -2.702575e-01
TIMESTEP 102950 / e 109 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -2.703240e-01
TIMESTEP 102996 / e 109 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -2.704633e-01
TIMESTEP 103042 / e 109 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -2.705696e-01
TIMESTEP 103088 / e 109 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -2.707304e-01
TIMESTEP 103134 / e 109 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -2.709413e-01
TIMESTEP 103180 / e 109 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX -2.710371e-01
TIMESTEP 103226 / e 109 / ba

TIMESTEP 106998 / e 113 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -2.799165e-01
TIMESTEP 107044 / e 113 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -2.800215e-01
TIMESTEP 107090 / e 113 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -2.800371e-01
TIMESTEP 107136 / e 113 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -2.800433e-01
TIMESTEP 107182 / e 113 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -2.800420e-01
TIMESTEP 107228 / e 113 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -2.800357e-01
TIMESTEP 107274 / e 113 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -2.801713e-01
TIMESTEP 107320 / e 113 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -2.803276e-01
TIMESTEP 107366 / e 113 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -2.804453e-01
TIMESTEP 107412 / e 113 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -2.805077e-01
TIMESTEP 107458 / e 113 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX -2.805333e-01
TIMESTEP 107504 / e 11

TIMESTEP 111445 / e 118 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -2.914479e-01
TIMESTEP 111491 / e 118 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -2.914783e-01
TIMESTEP 111537 / e 118 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -2.915734e-01
TIMESTEP 111583 / e 118 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -2.916606e-01
TIMESTEP 111629 / e 118 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -2.916986e-01
TIMESTEP 111675 / e 118 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX -2.917496e-01
TIMESTEP 111721 / e 118 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -2.918456e-01
TIMESTEP 111767 / e 118 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -2.920698e-01
TIMESTEP 111813 / e 118 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -2.923037e-01
TIMESTEP 111859 / e 118 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -2.924906e-01
TIMESTEP 111905 / e 118 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -2.926302e-01
TIMESTEP 111951 / e 118 / bar1

TIMESTEP 115723 / e 122 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -3.003583e-01
TIMESTEP 115769 / e 122 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -3.003983e-01
TIMESTEP 115815 / e 122 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -3.004259e-01
TIMESTEP 115861 / e 122 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -3.005273e-01
TIMESTEP 115907 / e 122 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -3.006932e-01
TIMESTEP 115953 / e 122 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX -3.008564e-01
TIMESTEP 115999 / e 123 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -3.009287e-01
TIMESTEP 116045 / e 123 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -3.010929e-01
TIMESTEP 116091 / e 123 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -3.011883e-01
TIMESTEP 116137 / e 123 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -3.013187e-01
TIMESTEP 116183 / e 123 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -3.014608e-01
TIMESTEP 116229 / e 123 / 

TIMESTEP 120001 / e 127 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -3.089357e-01
TIMESTEP 120047 / e 127 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -3.091120e-01
TIMESTEP 120093 / e 127 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -3.093116e-01
TIMESTEP 120139 / e 127 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -3.094001e-01
TIMESTEP 120185 / e 127 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -3.094549e-01
TIMESTEP 120231 / e 127 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -3.095914e-01
TIMESTEP 120277 / e 127 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -3.098819e-01
TIMESTEP 120323 / e 127 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -3.100739e-01
TIMESTEP 120369 / e 127 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -3.100598e-01
TIMESTEP 120415 / e 127 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -3.100412e-01
TIMESTEP 120461 / e 127 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -3.100264e-01
TIMESTEP 120507 / e 127 

TIMESTEP 124279 / e 132 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -3.190754e-01
TIMESTEP 124325 / e 132 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -3.192636e-01
TIMESTEP 124371 / e 132 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -3.193478e-01
TIMESTEP 124417 / e 132 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -3.194024e-01
TIMESTEP 124463 / e 132 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -3.195488e-01
TIMESTEP 124509 / e 132 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -3.196087e-01
TIMESTEP 124555 / e 132 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX -3.196402e-01
TIMESTEP 124601 / e 132 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -3.196771e-01
TIMESTEP 124647 / e 132 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -3.198950e-01
TIMESTEP 124693 / e 132 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -3.200655e-01
TIMESTEP 124739 / e 132 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -3.202216e-01
TIMESTEP 124785 / e 132 / bar1_

TIMESTEP 128557 / e 136 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -3.261599e-01
TIMESTEP 128603 / e 136 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -3.262022e-01
TIMESTEP 128649 / e 136 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -3.262790e-01
TIMESTEP 128695 / e 136 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -3.263282e-01
TIMESTEP 128741 / e 136 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -3.263440e-01
TIMESTEP 128787 / e 136 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -3.263442e-01
TIMESTEP 128833 / e 136 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX -3.264872e-01
TIMESTEP 128879 / e 137 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -3.266611e-01
TIMESTEP 128925 / e 137 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -3.267877e-01
TIMESTEP 128971 / e 137 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -3.268406e-01
TIMESTEP 129017 / e 137 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -3.269046e-01
TIMESTEP 129063 / e 137 /

TIMESTEP 132835 / e 141 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX -3.376803e-01
TIMESTEP 132881 / e 141 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -3.377563e-01
TIMESTEP 132927 / e 141 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -3.378054e-01
TIMESTEP 132973 / e 141 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -3.379565e-01
TIMESTEP 133019 / e 141 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -3.381108e-01
TIMESTEP 133065 / e 141 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -3.382487e-01
TIMESTEP 133111 / e 141 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -3.383103e-01
TIMESTEP 133157 / e 141 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -3.383519e-01
TIMESTEP 133203 / e 141 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -3.385013e-01
TIMESTEP 133249 / e 141 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -3.385677e-01
TIMESTEP 133295 / e 141 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -3.386102e-01
TIMESTEP 133341 / e 141 /

TIMESTEP 137113 / e 145 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX -3.494895e-01
TIMESTEP 137159 / e 146 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -3.496022e-01
TIMESTEP 137205 / e 146 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -3.496803e-01
TIMESTEP 137251 / e 146 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -3.497101e-01
TIMESTEP 137297 / e 146 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -3.497244e-01
TIMESTEP 137343 / e 146 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -3.498480e-01
TIMESTEP 137389 / e 146 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -3.499006e-01
TIMESTEP 137435 / e 146 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX -3.499329e-01
TIMESTEP 137481 / e 146 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -3.500464e-01
TIMESTEP 137527 / e 146 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -3.501289e-01
TIMESTEP 137573 / e 146 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -3.501665e-01
TIMESTEP 137619 / e 146 / bar1_

TIMESTEP 141391 / e 150 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -3.594895e-01
TIMESTEP 141437 / e 150 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -3.597384e-01
TIMESTEP 141483 / e 150 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -3.598774e-01
TIMESTEP 141529 / e 150 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -3.599395e-01
TIMESTEP 141575 / e 150 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -3.599752e-01
TIMESTEP 141621 / e 150 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -3.599855e-01
TIMESTEP 141667 / e 150 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -3.600417e-01
TIMESTEP 141713 / e 150 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX -3.603062e-01
TIMESTEP 141759 / e 151 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -3.605632e-01
TIMESTEP 141805 / e 151 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -3.608806e-01
TIMESTEP 141851 / e 151 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -3.610462e-01
TIMESTEP 141897 / e 151 

TIMESTEP 145669 / e 155 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -3.700164e-01
TIMESTEP 145715 / e 155 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX -3.700977e-01
TIMESTEP 145761 / e 155 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -3.701540e-01
TIMESTEP 145807 / e 155 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -3.702708e-01
TIMESTEP 145853 / e 155 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -3.703718e-01
TIMESTEP 145899 / e 155 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -3.705156e-01
TIMESTEP 145945 / e 155 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -3.706158e-01
TIMESTEP 145991 / e 155 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -3.707387e-01
TIMESTEP 146037 / e 155 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -3.710027e-01
TIMESTEP 146083 / e 155 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -3.709998e-01
TIMESTEP 146129 / e 155 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -3.710702e-01
TIMESTEP 146175 / e 155 / 

TIMESTEP 149947 / e 159 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -3.804589e-01
TIMESTEP 149993 / e 159 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX -3.805808e-01
TIMESTEP 150039 / e 160 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -3.806612e-01
TIMESTEP 150085 / e 160 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -3.808484e-01
TIMESTEP 150131 / e 160 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -3.809511e-01
TIMESTEP 150177 / e 160 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -3.811239e-01
TIMESTEP 150223 / e 160 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -3.814197e-01
TIMESTEP 150269 / e 160 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -3.816078e-01
TIMESTEP 150315 / e 160 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX -3.817433e-01
TIMESTEP 150361 / e 160 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -3.817997e-01
TIMESTEP 150407 / e 160 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -3.818316e-01
TIMESTEP 150453 / e 160 / bar1

TIMESTEP 154225 / e 164 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -3.884257e-01
TIMESTEP 154271 / e 164 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -3.885414e-01
TIMESTEP 154317 / e 164 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -3.886229e-01
TIMESTEP 154363 / e 164 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -3.887883e-01
TIMESTEP 154409 / e 164 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -3.888607e-01
TIMESTEP 154455 / e 164 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -3.889051e-01
TIMESTEP 154501 / e 164 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -3.889190e-01
TIMESTEP 154547 / e 164 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -3.889793e-01
TIMESTEP 154593 / e 164 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX -3.891571e-01
TIMESTEP 154639 / e 165 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -3.893332e-01
TIMESTEP 154685 / e 165 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -3.896481e-01
TIMESTEP 154731 / e 165

TIMESTEP 158503 / e 169 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -3.969798e-01
TIMESTEP 158549 / e 169 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -3.971342e-01
TIMESTEP 158595 / e 169 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX -3.972474e-01
TIMESTEP 158641 / e 169 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -3.972985e-01
TIMESTEP 158687 / e 169 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -3.973329e-01
TIMESTEP 158733 / e 169 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -3.973466e-01
TIMESTEP 158779 / e 169 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -3.973479e-01
TIMESTEP 158825 / e 169 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -3.973396e-01
TIMESTEP 158871 / e 169 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -3.973291e-01
TIMESTEP 158917 / e 169 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -3.973132e-01
TIMESTEP 158963 / e 169 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -3.972928e-01
TIMESTEP 159009 / e 169 / b

TIMESTEP 162781 / e 173 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -4.056642e-01
TIMESTEP 162827 / e 173 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -4.058075e-01
TIMESTEP 162873 / e 173 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX -4.058771e-01
TIMESTEP 162919 / e 174 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -4.059016e-01
TIMESTEP 162965 / e 174 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -4.059091e-01
TIMESTEP 163011 / e 174 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -4.059043e-01
TIMESTEP 163057 / e 174 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -4.059178e-01
TIMESTEP 163103 / e 174 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -4.060606e-01
TIMESTEP 163149 / e 174 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -4.062308e-01
TIMESTEP 163195 / e 174 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX -4.064363e-01
TIMESTEP 163241 / e 174 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -4.065856e-01
TIMESTEP 163287 / e 174 / bar

TIMESTEP 167059 / e 178 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -4.192810e-01
TIMESTEP 167105 / e 178 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -4.192770e-01
TIMESTEP 167151 / e 178 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -4.194317e-01
TIMESTEP 167280 / e 178 / bar1_score 1 / bar2_score 12 / REWARD 1 / Q_MAX -4.196909e-01
TIMESTEP 167409 / e 178 / bar1_score 1 / bar2_score 13 / REWARD -1 / Q_MAX -4.199358e-01
TIMESTEP 167455 / e 178 / bar1_score 1 / bar2_score 14 / REWARD -1 / Q_MAX -4.200493e-01
TIMESTEP 167501 / e 178 / bar1_score 1 / bar2_score 15 / REWARD -1 / Q_MAX -4.201335e-01
TIMESTEP 167547 / e 178 / bar1_score 1 / bar2_score 16 / REWARD -1 / Q_MAX -4.201922e-01
TIMESTEP 167593 / e 178 / bar1_score 1 / bar2_score 17 / REWARD -1 / Q_MAX -4.202187e-01
TIMESTEP 167639 / e 178 / bar1_score 1 / bar2_score 18 / REWARD -1 / Q_MAX -4.202555e-01
TIMESTEP 167685 / e 178 / bar1_score 1 / bar2_score 19 / REWARD -1 / Q_MAX -4.204678e-01
TIMESTEP 167731 / e 17

TIMESTEP 171503 / e 183 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -4.262386e-01
TIMESTEP 171549 / e 183 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -4.262479e-01
TIMESTEP 171595 / e 183 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -4.262474e-01
TIMESTEP 171641 / e 183 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -4.263225e-01
TIMESTEP 171687 / e 183 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX -4.264025e-01
TIMESTEP 171733 / e 183 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -4.265243e-01
TIMESTEP 171779 / e 183 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -4.267141e-01
TIMESTEP 171825 / e 183 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -4.268548e-01
TIMESTEP 171871 / e 183 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -4.269187e-01
TIMESTEP 171917 / e 183 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -4.270639e-01
TIMESTEP 171963 / e 183 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -4.271592e-01
TIMESTEP 172009 / e 183 / bar

TIMESTEP 175781 / e 187 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -4.351477e-01
TIMESTEP 175827 / e 187 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -4.352410e-01
TIMESTEP 175873 / e 187 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -4.352851e-01
TIMESTEP 175919 / e 187 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -4.353004e-01
TIMESTEP 175965 / e 187 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX -4.355060e-01
TIMESTEP 176011 / e 188 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -4.356629e-01
TIMESTEP 176057 / e 188 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -4.357753e-01
TIMESTEP 176103 / e 188 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -4.358310e-01
TIMESTEP 176149 / e 188 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -4.358517e-01
TIMESTEP 176195 / e 188 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -4.358587e-01
TIMESTEP 176241 / e 188 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -4.358547e-01
TIMESTEP 176287 / e 188 / b

TIMESTEP 180059 / e 192 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -4.421632e-01
TIMESTEP 180105 / e 192 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -4.421868e-01
TIMESTEP 180151 / e 192 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -4.421951e-01
TIMESTEP 180197 / e 192 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -4.421965e-01
TIMESTEP 180243 / e 192 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -4.422847e-01
TIMESTEP 180289 / e 192 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -4.423649e-01
TIMESTEP 180335 / e 192 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -4.424241e-01
TIMESTEP 180381 / e 192 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -4.424467e-01
TIMESTEP 180427 / e 192 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -4.424545e-01
TIMESTEP 180473 / e 192 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -4.424540e-01
TIMESTEP 180519 / e 192 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -4.424514e-01
TIMESTEP 180565 / e 192

TIMESTEP 184337 / e 197 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -4.486327e-01
TIMESTEP 184383 / e 197 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -4.487705e-01
TIMESTEP 184429 / e 197 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -4.488313e-01
TIMESTEP 184475 / e 197 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -4.491178e-01
TIMESTEP 184521 / e 197 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX -4.493278e-01
TIMESTEP 184567 / e 197 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX -4.495818e-01
TIMESTEP 184613 / e 197 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -4.497226e-01
TIMESTEP 184659 / e 197 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -4.497831e-01
TIMESTEP 184705 / e 197 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -4.498219e-01
TIMESTEP 184751 / e 197 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -4.499361e-01
TIMESTEP 184797 / e 197 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -4.501837e-01
TIMESTEP 184843 / e 197 / bar1

TIMESTEP 188615 / e 201 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -4.604494e-01
TIMESTEP 188661 / e 201 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -4.605439e-01
TIMESTEP 188707 / e 201 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -4.606043e-01
TIMESTEP 188753 / e 201 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -4.606225e-01
TIMESTEP 188799 / e 201 / bar1_score 0 / bar2_score 18 / REWARD -1 / Q_MAX -4.606131e-01
TIMESTEP 188845 / e 201 / bar1_score 0 / bar2_score 19 / REWARD -1 / Q_MAX -4.605815e-01
TIMESTEP 188891 / e 202 / bar1_score 0 / bar2_score 0 / REWARD -1 / Q_MAX -4.606689e-01
TIMESTEP 188937 / e 202 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX -4.607155e-01
TIMESTEP 188983 / e 202 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX -4.607871e-01
TIMESTEP 189029 / e 202 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX -4.609330e-01
TIMESTEP 189075 / e 202 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX -4.612086e-01
TIMESTEP 189121 / e 202 / 

TIMESTEP 192893 / e 206 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX -4.713142e-01
TIMESTEP 192939 / e 206 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX -4.715119e-01
TIMESTEP 192985 / e 206 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX -4.716530e-01
TIMESTEP 193031 / e 206 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX -4.717150e-01
TIMESTEP 193077 / e 206 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MAX -4.717484e-01
TIMESTEP 193123 / e 206 / bar1_score 0 / bar2_score 12 / REWARD -1 / Q_MAX -4.717534e-01
TIMESTEP 193169 / e 206 / bar1_score 0 / bar2_score 13 / REWARD -1 / Q_MAX -4.717432e-01
TIMESTEP 193215 / e 206 / bar1_score 0 / bar2_score 14 / REWARD -1 / Q_MAX -4.718433e-01
TIMESTEP 193261 / e 206 / bar1_score 0 / bar2_score 15 / REWARD -1 / Q_MAX -4.719108e-01
TIMESTEP 193307 / e 206 / bar1_score 0 / bar2_score 16 / REWARD -1 / Q_MAX -4.720403e-01
TIMESTEP 193353 / e 206 / bar1_score 0 / bar2_score 17 / REWARD -1 / Q_MAX -4.723838e-01
TIMESTEP 193399 / e 206 

In [None]:
JAI GURUDEV

# Back up code

In [12]:
tf.reset_default_graph()
s, readout, bs, r_conv_caps1, r_conv_caps2, r_class_caps, coord_add_op_class_caps = createNetwork()

(?, 12, 12, 32)
12
(?, 12, 12, 8, 16)
(?, 12, 12, 8, 1)
(?, 12, 12, 136)
(?, 5, 5, 136, 9) fdsggs
5
1 (?, 72, 17)
output shape --------------- (?, 72, 17)
activation shape---------------------- (?, 72, 1)
w (1, 72, 16, 4, 4)
w (?, 72, 16, 4, 4)
(?, 72, 16, 16) votes shape
Tensor("conv_caps1/v/tile___3/Reshape:0", shape=(?, 72, 16, 16), dtype=float32)  = votes
(?, 72, 16) r shape__________
(?, 72, 16, 1) r1
(?, 72, 16) ap
(?, 72, 16, 1) r1
(?, 16, 1) r_sum
(?, 16, 16) cost_h
activation (?, 16)
3 (?, 5, 5, 16, 16)
activation (?, 5, 5, 16, 1)
272
5 (?, 5, 5, 272)
(?, 3, 3, 272, 9) fdsggs
canv_caps2 (?, 144, 17) 3
canv_caps2_activation (?, 144, 1) 3
w (1, 144, 16, 4, 4)
w (?, 144, 16, 4, 4)
(?, 144, 16, 16) votes shape
144 for 1
Tensor("conv_caps2/v/tile___3/Reshape:0", shape=(?, 144, 16, 16), dtype=float32)  = votes
(?, 144, 16) r shape__________
(?, 144, 16, 1) r1
(?, 144, 16) ap
(?, 144, 16, 1) r1
(?, 16, 1) r_sum
(?, 16, 16) cost_h
4 (?, 16, 16)
4 ---activation (?, 16, 1)
w (1, 16, 10,

In [63]:
tf.reset_default_graph()
#batch_size = 50
X = tf.placeholder(tf.float32, shape=(None, 80, 80, 1), name='X')
#X = tf.placeholder(tf.float32, shape=(batch_size, 80, 80, 4), name='X')
Y = tf.placeholder(tf.float32, [None, 10], name='Y')
#Y = tf.placeholder(tf.float32, [batch_size, 10], name='Y')
# -------------------------------------------------------------------------------------
bs = tf.placeholder(tf.int32, shape=(), name='bs')
#bs = batch_size
r_conv_caps1= tf.placeholder(tf.float32,[None, 72, C], name='r_conv_caps1') # 5*5*batch_size
#r_conv_caps1= tf.placeholder(tf.float32,[5*5*batch_size, 72, C], name='r_conv_caps1') # 5*5*batch_size

r_conv_caps2 = tf.placeholder(tf.float32,[None, 144, D], name='r_conv_caps2') # 3*3*batch_size
#r_conv_caps2 = tf.placeholder(tf.float32,[3*3*batch_size, 144, D], name='r_conv_caps2') # 3*3*batch_size

r_class_caps = tf.placeholder(tf.float32,[None, 16, num_classes], name='r_class_caps') # 3*3*batch_size
#r_class_caps = tf.placeholder(tf.float32,[3*3*batch_size, 16, num_classes], name='r_class_caps') # 3*3*batch_size

#coord_add_op_class_caps  = tf.placeholder(tf.float32,[3*3*batch_size, 16, num_classes, 2], name='coord_add_op_class_caps')
                                                                 # 3*3*batch_size
coord_add_op_class_caps  = tf.placeholder(tf.float32,[None, 16, num_classes, 2], name='coord_add_op_class_caps')
# 3*3*batch_size

# -------------------------------------------------------------------------------------
data_size = int(X.get_shape()[1])
# xavier initialization is necessary here to provide higher stability
initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
# instead of initializing bias with constant 0, 
# a truncated normal initializer is exploited here for higher stability 
bias_initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)  # tf.constant_initializer(0.0)
# The paper didnot mention any regularization, a common l2 regularizer to weights is added here
weights_regularizer = tf.contrib.layers.l2_regularizer(5e-04)
# weights_initializer=initializer,
with slim.arg_scope([slim.conv2d], trainable=is_train, biases_initializer=bias_initializer, weights_regularizer=weights_regularizer):
    with tf.variable_scope('relu_conv1'):
        output = slim.conv2d(X, num_outputs=A, kernel_size=[10, 10], stride=6, padding='VALID', scope='relu_conv1', activation_fn=tf.nn.relu)
        data_size = int(np.floor((data_size - 10) / 6)) + 1
        print(output.get_shape())
        print(data_size)
        #assert output.get_shape() == [batch_size, data_size, data_size, 32]
        votes__1 = output
    with tf.variable_scope('primary_caps'):
        pose = slim.conv2d(output, num_outputs=B * 16,kernel_size=[1, 1], stride=1, padding='VALID', scope='primary_caps', activation_fn=None)
        activation = slim.conv2d(output, num_outputs=B, kernel_size=[
                                 1, 1], stride=1, padding='VALID', scope='primary_caps/activation', activation_fn=tf.nn.sigmoid)
        pose = tf.reshape(pose, shape=[-1, data_size, data_size, B, 16]) # (50, 12, 12, 8, 16)
        
        print(pose.get_shape())
        activation = tf.reshape(activation, shape=[-1, data_size, data_size, B, 1]) # (50, 12, 12, 8, 1)
        print(activation.get_shape())
        output = tf.concat([pose, activation], axis=4)
        output = tf.reshape(output, shape=[-1, data_size, data_size, B * 17]) # (50, 12, 12, 136)
        print(output.get_shape())
        
        #assert output.get_shape() == [batch_size, data_size, data_size, B * 17]
    with tf.variable_scope('conv_caps1') as scope:
        output = kernel_tile(output, 3, 2)
        data_size = int(np.floor((data_size - 2) / 2))
        print(data_size) # 5 
        output = tf.reshape(output, shape=[-1, 3 * 3 * B, 17]) 
        # batch_size * data_size * data_size  (1250, 72, 17) 
        print("1",output.get_shape())
        activation = tf.reshape(output[:, :, 16], shape=[-1, 3 * 3 * B, 1])
        print("output shape ---------------",output.get_shape())
        print("activation shape----------------------",activation.get_shape()) #  (1250, 72, 1)
        
        with tf.variable_scope('v') as scope:
            votes = mat_transform(output[:, :, :16], C, weights_regularizer, bs = bs*data_size*data_size)
            #bs*data_size*data_size)
            
            print(votes.get_shape(),"votes shape")
        with tf.variable_scope('routing') as scope:
            caps_num_i = int(activation.get_shape()[1])
            
            miu, activation, _ = em_routing(votes, activation, C, weights_regularizer,r_conv_caps1)
            # miu, activation, _ = em_routing(votes, activation, C, weights_regularizer)
            print("activation",activation.get_shape())
        pose = tf.reshape(miu, shape=[-1, data_size, data_size, C, 16])
        print("3",pose.get_shape()) # 50, 5, 5, 16, 16)
        activation = tf.reshape(activation, shape=[-1, data_size, data_size, C, 1])
        print("activation",activation.get_shape())
        cat_size =  activation.get_shape()[3]*activation.get_shape()[4] + pose.get_shape()[3] *pose.get_shape()[4]
        print(cat_size)
        output = tf.reshape(tf.concat([pose, activation], axis=4),[-1, data_size, data_size, cat_size])
        print("5",output.get_shape()) # (50, 5, 5, 272)
        
        
    with tf.variable_scope('conv_caps2') as scope:
        output = kernel_tile(output, 3, 1)
        
        data_size = int(np.floor((data_size - 2) / 1))
        
        output = tf.reshape(output, shape=[-1, 3 * 3 * C, 17]) # batch_size * data_size * data_size
        print("canv_caps2",output.get_shape(), data_size)
        activation = tf.reshape(output[:, :, 16], shape=[-1 , 3 * 3 * C, 1]) # batch_size * data_size * data_size
        print("canv_caps2_activation",activation.get_shape(), data_size)
        
        with tf.variable_scope('v') as scope:
            votes = mat_transform(output[:, :, :16], D, weights_regularizer,bs = bs*data_size*data_size)
            print(votes.get_shape(),"votes shape")
            
        with tf.variable_scope('routing') as scope:
            caps_num_i = int(activation.get_shape()[1])
            print(caps_num_i,"for 1")
            miu, activation, _ = em_routing(votes, activation, D, weights_regularizer, r_conv_caps2)

        pose = tf.reshape(miu, shape=[-1, D, 16]) # batch_size * data_size * data_size
        print("4",pose.get_shape())
        #tf.logging.info('conv cap 2 pose shape: {}'.format(votes.get_shape()))
        activation = tf.reshape(activation, shape=[-1, D, 1]) # batch_size * data_size * data_size
        print("4 ---activation",activation.get_shape())
        
    with tf.variable_scope('class_caps') as scope:
        with tf.variable_scope('v') as scope:
            votes = mat_transform(pose, num_classes, weights_regularizer,bs = bs*data_size*data_size)
            print(votes.get_shape(),"votes.getshape")
            assert votes.get_shape()[1:] == [D, num_classes, 16]
            #tf.logging.info('class cap votes original shape: {}'.format(votes.get_shape()))
            '''coord_add = get_coord_add('mnist') 
            coord_add = np.reshape(coord_add, newshape=[data_size * data_size, 1, 1, 2])
            coord_add = np.tile(coord_add, [bs, D, num_classes, 1])
            coord_add_op = tf.constant(coord_add, dtype=tf.float32)
            print("___coord_add______",coord_add_op.shape)'''
            
            votes = tf.concat([coord_add_op_class_caps, votes], axis=3)
            #tf.logging.info('class cap votes coord add shape: {}'.format(votes.get_shape()))
            print(votes.get_shape(),"coorr vote shape after  jnbfv")
        with tf.variable_scope('routing') as scope:
            caps_num_i = int(activation.get_shape()[1])
            print("_____",caps_num_i)
            miu, activation, test2 = em_routing(votes, activation, num_classes, weights_regularizer,r_class_caps)
            
        output = tf.reshape(activation, shape=[-1, data_size, data_size, num_classes]) #batch_size
        print("d op",output.get_shape())
    output = tf.reshape(tf.nn.avg_pool(output, ksize=[1, data_size, data_size, 1], strides=[
                1, 1, 1, 1], padding='VALID'), shape=[-1, num_classes]) # batch_size
    print("miu  2",miu.get_shape())
    pose = tf.nn.avg_pool(tf.reshape(miu, shape=[-1, data_size, data_size,miu.get_shape()[2]*miu.get_shape()[3]
                                ]), ksize=[1, data_size, data_size, 1], strides=[1, 1, 1, 1], padding='VALID')
    print("output_size_posssss",pose.get_shape())
    #miu  2 (450, 1, 10, 18)
    #output_size_posssss (50, 1, 1, 180)
    pose_out = tf.reshape(pose, shape=[-1, num_classes, 18])
    print("output_size",pose_out.get_shape())
    vector_j = tf.reshape(pose_out, shape= [-1, num_classes * 18])
    print("pose_out",pose_out.get_shape())
    print("vector_j",vector_j.get_shape())
with tf.variable_scope('output_layer') as scope:
    logits = tf.contrib.layers.fully_connected(vector_j, num_outputs=ACTIONS, activation_fn=None)
print("output_size",logits.get_shape())

(?, 12, 12, 32)
12
(?, 12, 12, 8, 16)
(?, 12, 12, 8, 1)
(?, 12, 12, 136)
(?, 5, 5, 136, 9) fdsggs
5
1 (?, 72, 17)
output shape --------------- (?, 72, 17)
activation shape---------------------- (?, 72, 1)
w (1, 72, 16, 4, 4)
w (?, 72, 16, 4, 4)
(?, 72, 16, 16) votes shape
Tensor("conv_caps1/v/tile___3/Reshape:0", shape=(?, 72, 16, 16), dtype=float32)  = votes
(?, 72, 16) r shape__________
(?, 72, 16, 1) r1
(?, 72, 16) ap
(?, 72, 16, 1) r1
(?, 16, 1) r_sum
(?, 16, 16) cost_h
activation (?, 16)
3 (?, 5, 5, 16, 16)
activation (?, 5, 5, 16, 1)
272
5 (?, 5, 5, 272)
(?, 3, 3, 272, 9) fdsggs
canv_caps2 (?, 144, 17) 3
canv_caps2_activation (?, 144, 1) 3
w (1, 144, 16, 4, 4)
w (?, 144, 16, 4, 4)
(?, 144, 16, 16) votes shape
144 for 1
Tensor("conv_caps2/v/tile___3/Reshape:0", shape=(?, 144, 16, 16), dtype=float32)  = votes
(?, 144, 16) r shape__________
(?, 144, 16, 1) r1
(?, 144, 16) ap
(?, 144, 16, 1) r1
(?, 16, 1) r_sum
(?, 16, 16) cost_h
4 (?, 16, 16)
4 ---activation (?, 16, 1)
w (1, 16, 10,

In [62]:
(50, 12, 12, 32)
final shape pose (50, 12, 12, 128)
final shape activation (50, 12, 12, 8)
(50, 12, 12, 8, 16)
(50, 12, 12, 8, 1)
(50, 12, 12, 136)

SyntaxError: invalid syntax (<ipython-input-62-3a57a218cc8c>, line 2)