# Matrix Capsule Network for RL

In [1]:
import tensorflow as tf
import tensorflow.contrib.slim as slim
import cv2
import sys
import pong_fun as game # whichever is imported "as game" will be used
import random
import time 
import numpy as np
from collections import deque

  from ._conv import register_converters as _register_converters


pygame 1.9.4
Hello from the pygame community. https://www.pygame.org/contribute.html


# Parameters for the MatrixNet

In [2]:
iter_routing = 2
ac_lambda0 = 0.01, 
#'\lambda in the activation function a_c, iteration 0')
ac_lambda_step = 0.01,
#'It is described that \lambda increases at each iteration with a fixed schedule, however specific super parameters is absent.')
epsilon = 1e-9

################################
A = 32 # , 'number of channels in output from ReLU Conv1')
B = 8 # , 'number of capsules in output from PrimaryCaps')
C = 16 #, 'number of channels in output from ConvCaps1')
D = 16 # , 'number of channels in output from ConvCaps2')
is_train = True
num_classes = 10
train_freq = 10

# Parameters for the Agent

In [3]:
ACTIONS = 6 # number of valid actions
GAMMA = 0.99 # decay rate of past observations
OBSERVE = 500. # timesteps to observe before training
EXPLORE = 5000. # frames over which to anneal epsilon
FINAL_EPSILON = 0.05 # final value of epsilon
INITIAL_EPSILON = 1.0 # starting value of epsilon
REPLAY_MEMORY = 50000 # number of previous transitions to remember
BATCH = 32 # size of minibatch
batch_size = BATCH

# Function for the MatrixNet

In [4]:
def kernel_tile(input, kernel, stride):
    # output = tf.extract_image_patches(input, ksizes=[1, kernel, kernel, 1], strides=[1, stride, stride, 1], rates=[1, 1, 1, 1], padding='VALID')

    input_shape = input.get_shape()
    tile_filter = np.zeros(shape=[kernel, kernel, input_shape[3],
                                  kernel * kernel], dtype=np.float32)
    for i in range(kernel):
        for j in range(kernel):
            tile_filter[i, j, :, i * kernel + j] = 1.0

    tile_filter_op = tf.constant(tile_filter, dtype=tf.float32)
    output = tf.nn.depthwise_conv2d(input, tile_filter_op, strides=[
                                    1, stride, stride, 1], padding='VALID')
    output_shape = output.get_shape()
    output = tf.reshape(output, shape=[-1, int( # -1== int(output_shape[0])
        output_shape[1]), int(output_shape[2]), int(input_shape[3]), kernel * kernel])
    #print(output.get_shape(),"fdsggs")
    output = tf.transpose(output, perm=[0, 1, 2, 4, 3])

    return output

# input should be a tensor with size as [batch_size, caps_num_i, 16]
def mat_transform(input, caps_num_c, regularizer, bs):
    #batch_size = input.get_shape()[0]
    caps_num_i = int(input.get_shape()[1])
    output = tf.reshape(input, shape=[-1, caps_num_i, 1, 4, 4])# batch_size = -1
    # the output of capsule is miu, the mean of a Gaussian, and activation, the sum of probabilities
    # it has no relationship with the absolute values of w and votes
    # using weights with bigger stddev helps numerical stability
    w = slim.variable('w', shape=[1, caps_num_i, caps_num_c, 4, 4], dtype=tf.float32,
                      initializer=tf.truncated_normal_initializer(mean=0.0, stddev=1.0),
                      regularizer=regularizer)
    #print("w",w.get_shape())
    with tf.variable_scope('tile___1'):
        w = tf.tile(w, [bs, 1, 1, 1, 1])
    #print("w",w.get_shape())
    with tf.variable_scope('tile___2'):
        output = tf.tile(output, [1, 1, caps_num_c, 1, 1])
    with tf.variable_scope('tile___3'):
        k = tf.matmul(output, w)
        votes = tf.reshape(k, [-1, caps_num_i, caps_num_c, 16]) #batch_size = -1
    #votes = tf.reshape(tf.matmul(output, w), [batch_size, caps_num_i, caps_num_c, 16])

    return votes


In [5]:
def em_routing(votes, activation, caps_num_c, regularizer, r_in):
    test = []

    #batch_size = votes.get_shape()[0]
    caps_num_i = int(activation.get_shape()[1])
    n_channels = int(votes.get_shape()[-1])

    sigma_square = []
    miu = []
    activation_out = []
    beta_v = slim.variable('beta_v', shape=[caps_num_c, n_channels], dtype=tf.float32,
                           initializer=tf.constant_initializer(0.0),#tf.truncated_normal_initializer(mean=0.0, stddev=0.01),
                           regularizer=regularizer)
    beta_a = slim.variable('beta_a', shape=[caps_num_c], dtype=tf.float32,
                           initializer=tf.constant_initializer(0.0),#tf.truncated_normal_initializer(mean=0.0, stddev=0.01),
                           regularizer=regularizer)

    # votes_in = tf.stop_gradient(votes, name='stop_gradient_votes')
    # activation_in = tf.stop_gradient(activation, name='stop_gradient_activation')
    votes_in = votes
    #print(votes_in,' = votes')
    activation_in = activation

    for iters in range(iter_routing):
        # if iters == cfg.iter_routing-1:

        # e-step
        if iters == 0:
            r = r_in# tf.constant(np.ones([batch_size, caps_num_i, caps_num_c], dtype=np.float32) / caps_num_c)
            #print(r.get_shape(),"r shape__________")
        else:
            # Contributor: Yunzhi Shi
            # log and exp here provide higher numerical stability especially for bigger number of iterations
            log_p_c_h = -tf.log(tf.sqrt(sigma_square)) - \
                        (tf.square(votes_in - miu) / (2 * sigma_square))
            log_p_c_h = log_p_c_h - \
                        (tf.reduce_max(log_p_c_h, axis=[2, 3], keep_dims=True) - tf.log(10.0))
            p_c = tf.exp(tf.reduce_sum(log_p_c_h, axis=3))

            ap = p_c * tf.reshape(activation_out, shape=[-1, 1, caps_num_c]) # batch_size
            #print(ap.get_shape(),"ap")
            # ap = tf.reshape(activation_out, shape=[batch_size, 1, caps_num_c])

            r = ap / (tf.reduce_sum(ap, axis=2, keepdims=True) + epsilon)

        # m-step
        r = r * activation_in
        r = r / (tf.reduce_sum(r, axis=2, keepdims=True)+epsilon)

        r_sum = tf.reduce_sum(r, axis=1, keepdims=True)
        r1 = tf.reshape(r / (r_sum + epsilon),
                        shape=[-1 , caps_num_i, caps_num_c, 1]) # batch_size
        #print(r1.get_shape(),"r1")
        miu = tf.reduce_sum(votes_in * r1, axis=1, keepdims=True)
        sigma_square = tf.reduce_sum(tf.square(votes_in - miu) * r1,
                                     axis=1, keepdims=True) + epsilon

        if iters == iter_routing-1:
            r_sum = tf.reshape(r_sum, [-1, caps_num_c, 1])  # batch_size
            #print(r_sum.get_shape(),"r_sum")
            cost_h = (beta_v + tf.log(tf.sqrt(tf.reshape(sigma_square,
                                                         shape=[-1, caps_num_c, n_channels])))) * r_sum
            #print(cost_h.get_shape(),"cost_h") # batch_size
            activation_out = tf.nn.softmax(ac_lambda0 * (beta_a - tf.reduce_sum(cost_h, axis=2)))
        else:
            activation_out = tf.nn.softmax(r_sum)
        # if iters <= cfg.iter_routing-1:
        #     activation_out = tf.stop_gradient(activation_out, name='stop_gradient_activation')

    return miu, activation_out, test

In [6]:
def get_coord_add(dataset_name: str):
    # TODO: get coord add for cifar10/100 datasets (32x32x3)
    options = {'mnist': ([[[8., 8.], [12., 8.], [16., 8.]],
                          [[8., 12.], [12., 12.], [16., 12.]],
                          [[8., 16.], [12., 16.], [16., 16.]]], 28.),
               'smallNORB': ([[[8., 8.], [12., 8.], [16., 8.], [24., 8.]],
                              [[8., 12.], [12., 12.], [16., 12.], [24., 12.]],
                              [[8., 16.], [12., 16.], [16., 16.], [24., 16.]],
                              [[8., 24.], [12., 24.], [16., 24.], [24., 24.]]], 32.)
               }
    coord_add, scale = options[dataset_name]

    coord_add = np.array(coord_add, dtype=np.float32) / scale

    return coord_add

# Agent for the MatCap DON 

In [7]:
def createNetwork():
    # -------------------------------------------------------------------------------------
    s = tf.placeholder(tf.float32, shape=(None, 80, 80, 4), name='X')
    bs = tf.placeholder(tf.int32, shape=(), name='bs')
    #bs = batch_size
    r_conv_caps1= tf.placeholder(tf.float32,[None, 72, C], name='r_conv_caps1') # 5*5*batch_size
    #r_conv_caps1= tf.placeholder(tf.float32,[5*5*batch_size, 72, C], name='r_conv_caps1') # 5*5*batch_size

    r_conv_caps2 = tf.placeholder(tf.float32,[None, 144, D], name='r_conv_caps2') # 3*3*batch_size
    #r_conv_caps2 = tf.placeholder(tf.float32,[3*3*batch_size, 144, D], name='r_conv_caps2') # 3*3*batch_size

    r_class_caps = tf.placeholder(tf.float32,[None, 16, num_classes], name='r_class_caps') # 3*3*batch_size
    #r_class_caps = tf.placeholder(tf.float32,[3*3*batch_size, 16, num_classes], name='r_class_caps') # 3*3*batch_size

    #coord_add_op_class_caps  = tf.placeholder(tf.float32,[3*3*batch_size, 16, num_classes, 2], name='coord_add_op_class_caps')
                                                                     # 3*3*batch_size
    coord_add_op_class_caps  = tf.placeholder(tf.float32,[None, 16, num_classes, 2], name='coord_add_op_class_caps')
    # 3*3*batch_size
    # -------------------------------------------------------------------------------------
    data_size = int(s.get_shape()[1])
    # xavier initialization is necessary here to provide higher stability
    initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
    # instead of initializing bias with constant 0, 
    # a truncated normal initializer is exploited here for higher stability 
    bias_initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)  # tf.constant_initializer(0.0)
    # The paper didnot mention any regularization, a common l2 regularizer to weights is added here
    weights_regularizer = tf.contrib.layers.l2_regularizer(5e-04)
    # weights_initializer=initializer,
    with slim.arg_scope([slim.conv2d], trainable=is_train, biases_initializer=bias_initializer, weights_regularizer=weights_regularizer):
        with tf.variable_scope('relu_conv1'):
            output = slim.conv2d(s, num_outputs=A, kernel_size=[10, 10], stride=6, padding='VALID', scope='relu_conv1', activation_fn=tf.nn.relu)
            data_size = int(np.floor((data_size - 10) / 6)) + 1
            #print(output.get_shape())
            #print(data_size)
            #assert output.get_shape() == [batch_size, data_size, data_size, 32]
            votes__1 = output
        with tf.variable_scope('primary_caps'):
            pose = slim.conv2d(output, num_outputs=B * 16,kernel_size=[1, 1], stride=1, padding='VALID', scope='primary_caps', activation_fn=None)
            activation = slim.conv2d(output, num_outputs=B, kernel_size=[
                                     1, 1], stride=1, padding='VALID', scope='primary_caps/activation', activation_fn=tf.nn.sigmoid)
            pose = tf.reshape(pose, shape=[-1, data_size, data_size, B, 16]) # (50, 12, 12, 8, 16)

            #print(pose.get_shape())
            activation = tf.reshape(activation, shape=[-1, data_size, data_size, B, 1]) # (50, 12, 12, 8, 1)
            #print(activation.get_shape())
            output = tf.concat([pose, activation], axis=4)
            output = tf.reshape(output, shape=[-1, data_size, data_size, B * 17]) # (50, 12, 12, 136)
            #print(output.get_shape())

            #assert output.get_shape() == [batch_size, data_size, data_size, B * 17]
        with tf.variable_scope('conv_caps1') as scope:
            output = kernel_tile(output, 3, 2)
            data_size = int(np.floor((data_size - 2) / 2))
            #print(data_size) # 5 
            output = tf.reshape(output, shape=[-1, 3 * 3 * B, 17]) 
            # batch_size * data_size * data_size  (1250, 72, 17) 
            #print("1",output.get_shape())
            activation = tf.reshape(output[:, :, 16], shape=[-1, 3 * 3 * B, 1])
            #print("output shape ---------------",output.get_shape())
            #print("activation shape----------------------",activation.get_shape()) #  (1250, 72, 1)

            with tf.variable_scope('v') as scope:
                votes = mat_transform(output[:, :, :16], C, weights_regularizer, bs = bs*data_size*data_size)
                #bs*data_size*data_size)

                #print(votes.get_shape(),"votes shape")
            with tf.variable_scope('routing') as scope:
                caps_num_i = int(activation.get_shape()[1])

                miu, activation, _ = em_routing(votes, activation, C, weights_regularizer,r_conv_caps1)
                # miu, activation, _ = em_routing(votes, activation, C, weights_regularizer)
                #print("activation",activation.get_shape())
            pose = tf.reshape(miu, shape=[-1, data_size, data_size, C, 16])
            #print("3",pose.get_shape()) # 50, 5, 5, 16, 16)
            activation = tf.reshape(activation, shape=[-1, data_size, data_size, C, 1])
            #print("activation",activation.get_shape())
            cat_size =  activation.get_shape()[3]*activation.get_shape()[4] + pose.get_shape()[3] *pose.get_shape()[4]
            #print(cat_size)
            output = tf.reshape(tf.concat([pose, activation], axis=4),[-1, data_size, data_size, cat_size])
            #print("5",output.get_shape()) # (50, 5, 5, 272)


        with tf.variable_scope('conv_caps2') as scope:
            output = kernel_tile(output, 3, 1)

            data_size = int(np.floor((data_size - 2) / 1))

            output = tf.reshape(output, shape=[-1, 3 * 3 * C, 17]) # batch_size * data_size * data_size
            #print("canv_caps2",output.get_shape(), data_size)
            activation = tf.reshape(output[:, :, 16], shape=[-1 , 3 * 3 * C, 1]) # batch_size * data_size * data_size
            #print("canv_caps2_activation",activation.get_shape(), data_size)

            with tf.variable_scope('v') as scope:
                votes = mat_transform(output[:, :, :16], D, weights_regularizer,bs = bs*data_size*data_size)
                #print(votes.get_shape(),"votes shape")

            with tf.variable_scope('routing') as scope:
                caps_num_i = int(activation.get_shape()[1])
                #print(caps_num_i,"for 1")
                miu, activation, _ = em_routing(votes, activation, D, weights_regularizer, r_conv_caps2)

            pose = tf.reshape(miu, shape=[-1, D, 16]) # batch_size * data_size * data_size
            #print("4",pose.get_shape())
            #tf.logging.info('conv cap 2 pose shape: {}'.format(votes.get_shape()))
            activation = tf.reshape(activation, shape=[-1, D, 1]) # batch_size * data_size * data_size
            #print("4 ---activation",activation.get_shape())

        with tf.variable_scope('class_caps') as scope:
            with tf.variable_scope('v') as scope:
                votes = mat_transform(pose, num_classes, weights_regularizer,bs = bs*data_size*data_size)
                #print(votes.get_shape(),"votes.getshape")
                #assert votes.get_shape()[1:] == [D, num_classes, 16]
                #tf.logging.info('class cap votes original shape: {}'.format(votes.get_shape()))
                '''coord_add = get_coord_add('mnist') 
                coord_add = np.reshape(coord_add, newshape=[data_size * data_size, 1, 1, 2])
                coord_add = np.tile(coord_add, [bs, D, num_classes, 1])
                coord_add_op = tf.constant(coord_add, dtype=tf.float32)
                print("___coord_add______",coord_add_op.shape)'''

                votes = tf.concat([coord_add_op_class_caps, votes], axis=3)
                #tf.logging.info('class cap votes coord add shape: {}'.format(votes.get_shape()))
                #print(votes.get_shape(),"coorr vote shape after  jnbfv")
            with tf.variable_scope('routing') as scope:
                caps_num_i = int(activation.get_shape()[1])
                #print("_____",caps_num_i)
                miu, activation, test2 = em_routing(votes, activation, num_classes, weights_regularizer,r_class_caps)

            output = tf.reshape(activation, shape=[-1, data_size, data_size, num_classes]) #batch_size
            #print("d op",output.get_shape())
        output = tf.reshape(tf.nn.avg_pool(output, ksize=[1, data_size, data_size, 1], strides=[
                    1, 1, 1, 1], padding='VALID'), shape=[-1, num_classes]) # batch_size
        #print("miu  2",miu.get_shape())
        pose = tf.nn.avg_pool(tf.reshape(miu, shape=[-1, data_size, data_size,miu.get_shape()[2]*miu.get_shape()[3]
                                    ]), ksize=[1, data_size, data_size, 1], strides=[1, 1, 1, 1], padding='VALID')
        #print("output_size_posssss",pose.get_shape())
        #miu  2 (450, 1, 10, 18)
        #output_size_posssss (50, 1, 1, 180)
        pose_out = tf.reshape(pose, shape=[-1, num_classes, 18])
        #print("output_size",pose_out.get_shape())
        vector_j = tf.reshape(pose_out, shape= [-1, num_classes * 18])
        #print("pose_out",pose_out.get_shape())
        #print("vector_j",vector_j.get_shape())
    with tf.variable_scope('output_layer') as scope:
        readout = tf.contrib.layers.fully_connected(vector_j, num_outputs=ACTIONS, activation_fn=None) 
    print("Network has been created successfully")
    return s, readout, bs, r_conv_caps1, r_conv_caps2, r_class_caps, coord_add_op_class_caps

# Environment for Training of the Agent

In [8]:
def trainNetwork(sess, s, readout, bs, r_conv_caps1, r_conv_caps2, r_class_caps, coord_add_op_class_caps):
    tick = time.time()
    # define the cost function
    a = tf.placeholder("float", [None, ACTIONS])
    y = tf.placeholder("float", [None])
    readout_action = tf.reduce_sum(tf.multiply(readout, a), reduction_indices = 1)
    cost = tf.reduce_mean(tf.square(y - readout_action))
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cost)

    # open up a game state to communicate with emulator
    game_state = game.GameState()
    # store the previous observations in replay memory
    replay_memory = deque()
    # get the first state by doing nothing and preprocess the image to 80x80x4
    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] = 1
    x_t, r_0, terminal, bar1_score, bar2_score = game_state.frame_step(do_nothing)
    x_t = cv2.cvtColor(cv2.resize(x_t, (80, 80)), cv2.COLOR_BGR2GRAY)
    ret, x_t = cv2.threshold(x_t,1,255,cv2.THRESH_BINARY)
    s_t = np.stack((x_t, x_t, x_t, x_t), axis = 2)  
    # saving and loading networks
    # saver = tf.train.Saver()
    # sess.run(tf.initialize_all_variables())
    sess.run(tf.global_variables_initializer())
    #-------------- initialization for EM routing ------------------------------------
    # ----------- for taking actions ---------------------
    batch_size_pred = 1
    coord_add_p = get_coord_add('mnist') 
    coord_add_p = np.reshape(coord_add_p, newshape=[3 * 3, 1, 1, 2]) #data_size =3
    coord_add_p = np.tile(coord_add_p, [batch_size_pred, D, num_classes, 1])
    r_conv_caps1_p = np.ones([5*5*batch_size_pred, 72, C]) / C
    r_conv_caps2_p = np.ones([3*3*batch_size_pred, 144, D]) / D
    r_class_caps_p = np.ones([3*3*batch_size_pred, 16, num_classes]) / num_classes
    
    # --------- initialization for training -------------------------------------------
    batch_size_train = batch_size
    coord_add_t = get_coord_add('mnist') 
    coord_add_t = np.reshape(coord_add_t, newshape=[3 * 3, 1, 1, 2]) #data_size =3
    coord_add_t = np.tile(coord_add_t, [batch_size_train, D, num_classes, 1])
    r_conv_caps1_t = np.ones([5*5*batch_size_train, 72, C]) / C
    r_conv_caps2_t = np.ones([3*3*batch_size_train, 144, D]) / D
    r_class_caps_t = np.ones([3*3*batch_size_train, 16, num_classes]) / num_classes
    #------------------------------------ ENDS ---------------------------------------
    
    epsilon = INITIAL_EPSILON
    t = 0
    episode = 0
    while True:
        # choose an action epsilon greedily
        # readout_t = readout.eval(feed_dict = {s : [s_t].reshape((1,80,80,4))})[0]
        readout_t = readout.eval(feed_dict = {s:s_t.reshape((1,80,80,4)),
                                              r_conv_caps1: r_conv_caps1_p,
                                              r_conv_caps2: r_conv_caps2_p,
                                              r_class_caps: r_class_caps_p,
                                              coord_add_op_class_caps: coord_add_p,
                                              bs: np.int32(batch_size_pred)})
        #readout_t = readout.eval(feed_dict = {s:s_t.reshape((1,84,84,4)), coeff:b_IJ1})
        
        a_t = np.zeros([ACTIONS])
        action_index = 0
        if random.random() <= epsilon or t <= OBSERVE:
            action_index = random.randrange(ACTIONS)
            a_t[action_index] = 1
        else:
            action_index = np.argmax(readout_t)
            a_t[action_index] = 1

        # scale down epsilon
        if epsilon > FINAL_EPSILON and t > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE

        # run the selected action and observe next state and reward
        x_t1_col, r_t, terminal, bar1_score, bar2_score = game_state.frame_step(a_t)
        if(terminal == 1):
            episode +=1
        x_t1 = cv2.cvtColor(cv2.resize(x_t1_col, (80, 80)), cv2.COLOR_BGR2GRAY)
        ret, x_t1 = cv2.threshold(x_t1,1,255,cv2.THRESH_BINARY)
        x_t1 = np.reshape(x_t1, (80, 80, 1))
        s_t1 = np.append(x_t1, s_t[:,:,0:3], axis = 2)

        # store the transition in D
        replay_memory.append((s_t, a_t, r_t, s_t1, terminal))
        if len(replay_memory ) > REPLAY_MEMORY:
            replay_memory.popleft()
            
        # only train if done observing
        if t > OBSERVE and t%train_freq==0:
            # sample a minibatch to train on
            minibatch = random.sample(replay_memory , BATCH)
            
            # get the batch variables
            s_j_batch = [d[0] for d in minibatch]
            a_batch = [d[1] for d in minibatch]
            r_batch = [d[2] for d in minibatch]
            s_j1_batch = [d[3] for d in minibatch]

            y_batch = []
            readout_j1_batch = readout.eval(feed_dict = {s:s_j1_batch,
                                                         r_conv_caps1: r_conv_caps1_t,
                                                         r_conv_caps2: r_conv_caps2_t,
                                                         r_class_caps: r_class_caps_t,
                                                         coord_add_op_class_caps: coord_add_t,
                                                         bs: np.int32(batch_size_train)})
            #readout_j1_batch = readout.eval(feed_dict = {s:s_j1_batch, coeff:b_IJ2 })

            for i in range(0, len(minibatch)):
                # if terminal only equals reward
                if minibatch[i][4]:
                    y_batch.append(r_batch[i])
                else:
                    y_batch.append(r_batch[i] + GAMMA * np.max(readout_j1_batch[i]))

            # perform gradient step
            train_step.run(feed_dict = {
                y : y_batch,
                a : a_batch,
                s : s_j_batch,
                r_conv_caps1: r_conv_caps1_t,
                r_conv_caps2: r_conv_caps2_t,
                r_class_caps: r_class_caps_t,
                coord_add_op_class_caps: coord_add_t,
                bs: np.int32(batch_size_train)})

        # update the old values
        s_t = s_t1
        t += 1

        # save progress every 10000 iterations
        #if t % 10000 == 0:
        #    saver.save(sess, 'saved_networks/' + GAME + '-dqn', global_step = t)
        if r_t!= 0:
            print ("TIMESTEP", t, "/ e", episode, "/ bar1_score", bar1_score, "/ bar2_score", bar2_score, "/ REWARD", r_t, "/ Q_MAX %e" % np.max(readout_t))

        if( (bar1_score - bar2_score) > 18): 
            print("Game_Ends_in Time:",int(time.time() - tick))
            break;   
            
        # write info to files
        '''
        if t % 10000 <= 100:
            a_file.write(",".join([str(x) for x in readout_t]) + '\n')
            h_file.write(",".join([str(x) for x in h_fc1.eval(feed_dict={s:[s_t]})[0]]) + '\n')
            cv2.imwrite("logs_tetris/frame" + str(t) + ".png", x_t1)
        '''

In [9]:
def playGame():
    tf.reset_default_graph()
    sess = tf.InteractiveSession()
    s, readout, bs, r_conv_caps1, r_conv_caps2, r_class_caps, coord_add_op_class_caps = createNetwork()
    trainNetwork(sess, s, readout, bs, r_conv_caps1, r_conv_caps2, r_class_caps, coord_add_op_class_caps)

In [None]:
def main():
    playGame()

if __name__ == "__main__":
    tick = time.time()
    main()
    print("Game_Ends_in Time:",int(time.time() - tick))
    print("____________ END HERE _____________")

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Network has been created successfully
TIMESTEP 129 / e 0 / bar1_score 0 / bar2_score 1 / REWARD -1 / Q_MAX 0.000000e+00
TIMESTEP 175 / e 0 / bar1_score 0 / bar2_score 2 / REWARD -1 / Q_MAX 0.000000e+00
TIMESTEP 221 / e 0 / bar1_score 0 / bar2_score 3 / REWARD -1 / Q_MAX 0.000000e+00
TIMESTEP 267 / e 0 / bar1_score 0 / bar2_score 4 / REWARD -1 / Q_MAX 0.000000e+00
TIMESTEP 313 / e 0 / bar1_score 0 / bar2_score 5 / REWARD -1 / Q_MAX 0.000000e+00
TIMESTEP 359 / e 0 / bar1_score 0 / bar2_score 6 / REWARD -1 / Q_MAX 0.000000e+00
TIMESTEP 405 / e 0 / bar1_score 0 / bar2_score 7 / REWARD -1 / Q_MAX 0.000000e+00
TIMESTEP 451 / e 0 / bar1_score 0 / bar2_score 8 / REWARD -1 / Q_MAX 0.000000e+00
TIMESTEP 497 / e 0 / bar1_score 0 / bar2_score 9 / REWARD -1 / Q_MAX 0.000000e+00
TIMESTEP 543 / e 0 / bar1_score 0 / bar2_score 10 / REWARD -1 / Q_MAX 0.000000e+00
TIMESTEP 589 / e 0 / bar1_score 0 / bar2_score 11 / REWARD -1 / Q_MA

# Back up code

In [12]:
tf.reset_default_graph()
s, readout, bs, r_conv_caps1, r_conv_caps2, r_class_caps, coord_add_op_class_caps = createNetwork()

(?, 12, 12, 32)
12
(?, 12, 12, 8, 16)
(?, 12, 12, 8, 1)
(?, 12, 12, 136)
(?, 5, 5, 136, 9) fdsggs
5
1 (?, 72, 17)
output shape --------------- (?, 72, 17)
activation shape---------------------- (?, 72, 1)
w (1, 72, 16, 4, 4)
w (?, 72, 16, 4, 4)
(?, 72, 16, 16) votes shape
Tensor("conv_caps1/v/tile___3/Reshape:0", shape=(?, 72, 16, 16), dtype=float32)  = votes
(?, 72, 16) r shape__________
(?, 72, 16, 1) r1
(?, 72, 16) ap
(?, 72, 16, 1) r1
(?, 16, 1) r_sum
(?, 16, 16) cost_h
activation (?, 16)
3 (?, 5, 5, 16, 16)
activation (?, 5, 5, 16, 1)
272
5 (?, 5, 5, 272)
(?, 3, 3, 272, 9) fdsggs
canv_caps2 (?, 144, 17) 3
canv_caps2_activation (?, 144, 1) 3
w (1, 144, 16, 4, 4)
w (?, 144, 16, 4, 4)
(?, 144, 16, 16) votes shape
144 for 1
Tensor("conv_caps2/v/tile___3/Reshape:0", shape=(?, 144, 16, 16), dtype=float32)  = votes
(?, 144, 16) r shape__________
(?, 144, 16, 1) r1
(?, 144, 16) ap
(?, 144, 16, 1) r1
(?, 16, 1) r_sum
(?, 16, 16) cost_h
4 (?, 16, 16)
4 ---activation (?, 16, 1)
w (1, 16, 10,

In [63]:
tf.reset_default_graph()
#batch_size = 50
X = tf.placeholder(tf.float32, shape=(None, 80, 80, 1), name='X')
#X = tf.placeholder(tf.float32, shape=(batch_size, 80, 80, 4), name='X')
Y = tf.placeholder(tf.float32, [None, 10], name='Y')
#Y = tf.placeholder(tf.float32, [batch_size, 10], name='Y')
# -------------------------------------------------------------------------------------
bs = tf.placeholder(tf.int32, shape=(), name='bs')
#bs = batch_size
r_conv_caps1= tf.placeholder(tf.float32,[None, 72, C], name='r_conv_caps1') # 5*5*batch_size
#r_conv_caps1= tf.placeholder(tf.float32,[5*5*batch_size, 72, C], name='r_conv_caps1') # 5*5*batch_size

r_conv_caps2 = tf.placeholder(tf.float32,[None, 144, D], name='r_conv_caps2') # 3*3*batch_size
#r_conv_caps2 = tf.placeholder(tf.float32,[3*3*batch_size, 144, D], name='r_conv_caps2') # 3*3*batch_size

r_class_caps = tf.placeholder(tf.float32,[None, 16, num_classes], name='r_class_caps') # 3*3*batch_size
#r_class_caps = tf.placeholder(tf.float32,[3*3*batch_size, 16, num_classes], name='r_class_caps') # 3*3*batch_size

#coord_add_op_class_caps  = tf.placeholder(tf.float32,[3*3*batch_size, 16, num_classes, 2], name='coord_add_op_class_caps')
                                                                 # 3*3*batch_size
coord_add_op_class_caps  = tf.placeholder(tf.float32,[None, 16, num_classes, 2], name='coord_add_op_class_caps')
# 3*3*batch_size

# -------------------------------------------------------------------------------------
data_size = int(X.get_shape()[1])
# xavier initialization is necessary here to provide higher stability
initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
# instead of initializing bias with constant 0, 
# a truncated normal initializer is exploited here for higher stability 
bias_initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)  # tf.constant_initializer(0.0)
# The paper didnot mention any regularization, a common l2 regularizer to weights is added here
weights_regularizer = tf.contrib.layers.l2_regularizer(5e-04)
# weights_initializer=initializer,
with slim.arg_scope([slim.conv2d], trainable=is_train, biases_initializer=bias_initializer, weights_regularizer=weights_regularizer):
    with tf.variable_scope('relu_conv1'):
        output = slim.conv2d(X, num_outputs=A, kernel_size=[10, 10], stride=6, padding='VALID', scope='relu_conv1', activation_fn=tf.nn.relu)
        data_size = int(np.floor((data_size - 10) / 6)) + 1
        print(output.get_shape())
        print(data_size)
        #assert output.get_shape() == [batch_size, data_size, data_size, 32]
        votes__1 = output
    with tf.variable_scope('primary_caps'):
        pose = slim.conv2d(output, num_outputs=B * 16,kernel_size=[1, 1], stride=1, padding='VALID', scope='primary_caps', activation_fn=None)
        activation = slim.conv2d(output, num_outputs=B, kernel_size=[
                                 1, 1], stride=1, padding='VALID', scope='primary_caps/activation', activation_fn=tf.nn.sigmoid)
        pose = tf.reshape(pose, shape=[-1, data_size, data_size, B, 16]) # (50, 12, 12, 8, 16)
        
        print(pose.get_shape())
        activation = tf.reshape(activation, shape=[-1, data_size, data_size, B, 1]) # (50, 12, 12, 8, 1)
        print(activation.get_shape())
        output = tf.concat([pose, activation], axis=4)
        output = tf.reshape(output, shape=[-1, data_size, data_size, B * 17]) # (50, 12, 12, 136)
        print(output.get_shape())
        
        #assert output.get_shape() == [batch_size, data_size, data_size, B * 17]
    with tf.variable_scope('conv_caps1') as scope:
        output = kernel_tile(output, 3, 2)
        data_size = int(np.floor((data_size - 2) / 2))
        print(data_size) # 5 
        output = tf.reshape(output, shape=[-1, 3 * 3 * B, 17]) 
        # batch_size * data_size * data_size  (1250, 72, 17) 
        print("1",output.get_shape())
        activation = tf.reshape(output[:, :, 16], shape=[-1, 3 * 3 * B, 1])
        print("output shape ---------------",output.get_shape())
        print("activation shape----------------------",activation.get_shape()) #  (1250, 72, 1)
        
        with tf.variable_scope('v') as scope:
            votes = mat_transform(output[:, :, :16], C, weights_regularizer, bs = bs*data_size*data_size)
            #bs*data_size*data_size)
            
            print(votes.get_shape(),"votes shape")
        with tf.variable_scope('routing') as scope:
            caps_num_i = int(activation.get_shape()[1])
            
            miu, activation, _ = em_routing(votes, activation, C, weights_regularizer,r_conv_caps1)
            # miu, activation, _ = em_routing(votes, activation, C, weights_regularizer)
            print("activation",activation.get_shape())
        pose = tf.reshape(miu, shape=[-1, data_size, data_size, C, 16])
        print("3",pose.get_shape()) # 50, 5, 5, 16, 16)
        activation = tf.reshape(activation, shape=[-1, data_size, data_size, C, 1])
        print("activation",activation.get_shape())
        cat_size =  activation.get_shape()[3]*activation.get_shape()[4] + pose.get_shape()[3] *pose.get_shape()[4]
        print(cat_size)
        output = tf.reshape(tf.concat([pose, activation], axis=4),[-1, data_size, data_size, cat_size])
        print("5",output.get_shape()) # (50, 5, 5, 272)
        
        
    with tf.variable_scope('conv_caps2') as scope:
        output = kernel_tile(output, 3, 1)
        
        data_size = int(np.floor((data_size - 2) / 1))
        
        output = tf.reshape(output, shape=[-1, 3 * 3 * C, 17]) # batch_size * data_size * data_size
        print("canv_caps2",output.get_shape(), data_size)
        activation = tf.reshape(output[:, :, 16], shape=[-1 , 3 * 3 * C, 1]) # batch_size * data_size * data_size
        print("canv_caps2_activation",activation.get_shape(), data_size)
        
        with tf.variable_scope('v') as scope:
            votes = mat_transform(output[:, :, :16], D, weights_regularizer,bs = bs*data_size*data_size)
            print(votes.get_shape(),"votes shape")
            
        with tf.variable_scope('routing') as scope:
            caps_num_i = int(activation.get_shape()[1])
            print(caps_num_i,"for 1")
            miu, activation, _ = em_routing(votes, activation, D, weights_regularizer, r_conv_caps2)

        pose = tf.reshape(miu, shape=[-1, D, 16]) # batch_size * data_size * data_size
        print("4",pose.get_shape())
        #tf.logging.info('conv cap 2 pose shape: {}'.format(votes.get_shape()))
        activation = tf.reshape(activation, shape=[-1, D, 1]) # batch_size * data_size * data_size
        print("4 ---activation",activation.get_shape())
        
    with tf.variable_scope('class_caps') as scope:
        with tf.variable_scope('v') as scope:
            votes = mat_transform(pose, num_classes, weights_regularizer,bs = bs*data_size*data_size)
            print(votes.get_shape(),"votes.getshape")
            assert votes.get_shape()[1:] == [D, num_classes, 16]
            #tf.logging.info('class cap votes original shape: {}'.format(votes.get_shape()))
            '''coord_add = get_coord_add('mnist') 
            coord_add = np.reshape(coord_add, newshape=[data_size * data_size, 1, 1, 2])
            coord_add = np.tile(coord_add, [bs, D, num_classes, 1])
            coord_add_op = tf.constant(coord_add, dtype=tf.float32)
            print("___coord_add______",coord_add_op.shape)'''
            
            votes = tf.concat([coord_add_op_class_caps, votes], axis=3)
            #tf.logging.info('class cap votes coord add shape: {}'.format(votes.get_shape()))
            print(votes.get_shape(),"coorr vote shape after  jnbfv")
        with tf.variable_scope('routing') as scope:
            caps_num_i = int(activation.get_shape()[1])
            print("_____",caps_num_i)
            miu, activation, test2 = em_routing(votes, activation, num_classes, weights_regularizer,r_class_caps)
            
        output = tf.reshape(activation, shape=[-1, data_size, data_size, num_classes]) #batch_size
        print("d op",output.get_shape())
    output = tf.reshape(tf.nn.avg_pool(output, ksize=[1, data_size, data_size, 1], strides=[
                1, 1, 1, 1], padding='VALID'), shape=[-1, num_classes]) # batch_size
    print("miu  2",miu.get_shape())
    pose = tf.nn.avg_pool(tf.reshape(miu, shape=[-1, data_size, data_size,miu.get_shape()[2]*miu.get_shape()[3]
                                ]), ksize=[1, data_size, data_size, 1], strides=[1, 1, 1, 1], padding='VALID')
    print("output_size_posssss",pose.get_shape())
    #miu  2 (450, 1, 10, 18)
    #output_size_posssss (50, 1, 1, 180)
    pose_out = tf.reshape(pose, shape=[-1, num_classes, 18])
    print("output_size",pose_out.get_shape())
    vector_j = tf.reshape(pose_out, shape= [-1, num_classes * 18])
    print("pose_out",pose_out.get_shape())
    print("vector_j",vector_j.get_shape())
with tf.variable_scope('output_layer') as scope:
    logits = tf.contrib.layers.fully_connected(vector_j, num_outputs=ACTIONS, activation_fn=None)
print("output_size",logits.get_shape())

(?, 12, 12, 32)
12
(?, 12, 12, 8, 16)
(?, 12, 12, 8, 1)
(?, 12, 12, 136)
(?, 5, 5, 136, 9) fdsggs
5
1 (?, 72, 17)
output shape --------------- (?, 72, 17)
activation shape---------------------- (?, 72, 1)
w (1, 72, 16, 4, 4)
w (?, 72, 16, 4, 4)
(?, 72, 16, 16) votes shape
Tensor("conv_caps1/v/tile___3/Reshape:0", shape=(?, 72, 16, 16), dtype=float32)  = votes
(?, 72, 16) r shape__________
(?, 72, 16, 1) r1
(?, 72, 16) ap
(?, 72, 16, 1) r1
(?, 16, 1) r_sum
(?, 16, 16) cost_h
activation (?, 16)
3 (?, 5, 5, 16, 16)
activation (?, 5, 5, 16, 1)
272
5 (?, 5, 5, 272)
(?, 3, 3, 272, 9) fdsggs
canv_caps2 (?, 144, 17) 3
canv_caps2_activation (?, 144, 1) 3
w (1, 144, 16, 4, 4)
w (?, 144, 16, 4, 4)
(?, 144, 16, 16) votes shape
144 for 1
Tensor("conv_caps2/v/tile___3/Reshape:0", shape=(?, 144, 16, 16), dtype=float32)  = votes
(?, 144, 16) r shape__________
(?, 144, 16, 1) r1
(?, 144, 16) ap
(?, 144, 16, 1) r1
(?, 16, 1) r_sum
(?, 16, 16) cost_h
4 (?, 16, 16)
4 ---activation (?, 16, 1)
w (1, 16, 10,

In [62]:
(50, 12, 12, 32)
final shape pose (50, 12, 12, 128)
final shape activation (50, 12, 12, 8)
(50, 12, 12, 8, 16)
(50, 12, 12, 8, 1)
(50, 12, 12, 136)

SyntaxError: invalid syntax (<ipython-input-62-3a57a218cc8c>, line 2)