In [1]:
import tensorflow as tf

# Define nodes

In [2]:
# the input layer of right left and right images
input_width = 392
m = 1000
left_input = tf.placeholder(tf.float32, shape = [None,input_width])
right_input = tf.placeholder(tf.float32, shape = [None,input_width])

In [3]:
def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial)

def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)

## The first layer

In [4]:
o = 50 #variable to store no of output nodes
c_left = 2038 #width of hidden layer of left images
c_right = 1608 #right images
W_left1 = weight_variable([input_width,c_left]) #weights for input layer -> hidden
W_right1 = weight_variable([input_width,c_right])

#bias variables
b_left1 = bias_variable([c_left])
b_right1 = bias_variable([c_right])

The authors propose a new activation function given as
$$s(x) = g^{-1}(x) $$ 
where 
$$ g(x) = x^3/3 + x $$

In [5]:
def activation(x,W, b):
    z = tf.matmul(x,W) + b
    return tf.inv((z**3)/3 + z)

In [6]:
#computation of the nodes of the first layer
h_left1 = activation(left_input,W_left1,b_left1)
h_right1 = activation(right_input,W_right1,b_right1)

# The second layer

In [7]:
#weights of hidden layer ->output
W_left2 = weight_variable([c_left,o])
W_right2 = weight_variable([c_right,o])

#bias variables
b_left2 = bias_variable([o])
b_right2 = bias_variable([o])

#computation of the nodes at the output
output_left = activation(h_left1,W_left2,b_left2)
output_right = activation(h_right1,W_right2,b_right2)

# Objective function

We centre the output matrices
$$ \bar{H}_1 = H_1 - \frac{1}{m}H_1\mathbf{1} $$
where $H_1$ is a $o\times m$ matrix

In [8]:
H1_bar = tf.transpose(output_left - output_left/m)
H2_bar = tf.transpose(output_right - output_right/m)

We find the covariance matrix
$$ \hat{\Sigma}_{12} = \frac{1}{m-1} \bar{H}_1\bar{H}_2' $$
and 
$$ \hat{\Sigma}_{11} = \frac{1}{m-1} \bar{H}_1\bar{H}_1' +r_1I$$

In [9]:
r1 = 2
r2 = 2
sigma12 = tf.matmul(H1_bar, tf.transpose(H2_bar))/(m-1)
sigma11 = tf.matmul(H1_bar,tf.transpose(H1_bar))/(m-1) + r1*tf.ones([o,o])
sigma22 = tf.matmul(H2_bar,tf.transpose(H2_bar))/(m-1) + r2*tf.ones([o,o])

The total correlation of the components of matrix 
$$ T = \hat{\Sigma}_{11}^{-1/2}\hat{\Sigma}_{12}\hat{\Sigma}_{22}^{-1/2} $$
is given as
$$corr(H_1,H_2) = tr(T'T)^{1/2} $$

In [10]:
#Function that performs matrix square root
#we perform the sqrt root by doing a svd
def matrix_sqrt(X):
    #s are singular values, u is matrix on the left
    #v is matrix on the right
    s,u,v = tf.svd(X,compute_uv = True,full_matrices = True)
    s = tf.diagonal(tf.sqrt(s)) #take the sqrt of the elements
    return tf.matmul(tf.matmul(u,s),tf.transpose(v))

In [11]:
sigma11_sqrtinv = tf.matrix_inverse(matrix_sqrt(sigma11))
sigma22_sqrtinv = tf.matrix_inverse(matrix_sqrt(sigma22))

T = tf.matmul(tf.matmul(sigma11_sqrtinv,sigma12),sigma22_sqrtinv)

AttributeError: 'module' object has no attribute 'svd'

In [None]:
corr = tf.trace(matrix_sqrt(tf.matmul(tf.transpose(T),T)))

In [None]:
# We train using adam optimiser. The paper suggests using full batch
# L-BFGS
tran_step = tf.train.AdamOptimizer(learning_rate = 0.5).minimize(corr)

In [None]:
test = tf.load_op_library('/home/hngu4068/Documents/tensorflow/svd_op.so')

In [None]:
test = tf.load_op_library('/home/hngu4068/Documents/zero_out.so')