# TensorBayes

### Adaptation of `BayesC.cpp`



## Imports

In [41]:
import tensorflow as tf

In [3]:
import tensorflow_probability as tfp

In [2]:
import numpy as np

In [4]:
tfd = tfp.distributions

## File input

To do

In [2]:
# Get the numbers of columns in the csv:
# File I/O here 
filenames = ""

csv_in = open(filenames, "r")                        # open the csv
ncol = len(csv_in.readline().split(","))            # read the first line and count the # of columns
csv_in.close()                                      # close the csv
print("Number of columns in the csv: " + str(ncol)) # print the # of columns

FileNotFoundError: [Errno 2] No such file or directory: 'ex.csv'

## Reproducibility

Seed setting for reproducable research.

In [105]:
# To do: get a numpy seed or look at how TF implements rng.

# each distributions.sample() seen below can be seedeed.
# ex. dist.sample(seed=32): return a sample of shape=() (scalar).

# Set graph-level seed
tf.set_random_seed(1234)

## Distributions functions

- Random Uniform:   
return a sample from a uniform distribution of limits parameter `lower ` and `higher`.
   
   
- Random Normal:   
return a sample from a normal distribution of parameter `mean` and `standard deviation`.
   
   
- Random Beta:   
return a random quantile of a beta distribution of parameter `alpha` and `beta`.
   
   
- Random Inversed Chi$^2$:   
return a random quantile of a inversed chi$^2$ distribution of parameter `degrees of freedom` and `scale`.
   
   
- Random Bernoulli:   
return a sample from a bernoulli distribution of probability of sucess `p`.
   
   

In [69]:
# Note: written as a translation of BayesC.cpp
# the function definitions might not be needeed,
# and the declarations of the distributions could be enough

def runif(lower, higher):
    dist = tfd.Uniform(lower, higher)
    return dist.sample()

def rnorm(mean, sd):
    dist = tfd.Normal(loc= mean, scale= sd)
    return dist.sample()

def rbeta(alpha, beta):
    dist = tfd.Beta(float(alpha), float(beta))
    return dist.sample()

def rinvchisq(df, scale):
    dist = tfd.InverseGamma(df*0.5, df*scale*0.5)
    return dist.sample()

def rbernoulli(p):
    dist = tfd.Bernoulli(probs=p)
    return dist.sample()


## Sampling functions

- Sampling of the mean   
     
    
- Sampling of the variance of beta   
   
   
- Sampling of the error variance of Y   
   
   
- Sample of the mixture weight   
   
   

In [32]:
# sample mean
def sample_mu(N, Esigma2, Y, X, beta): #as in BayesC, with the N parameter
    mean = tf.reduce_sum(tf.subtract(Y, tf.matmul(X, beta)))/N
    sd = tf.sqrt(Esigma2/N)
    mu = rnorm(mean, sd)
    return mu

# sample variance of beta
def sample_psi2_chisq( beta, NZ, v0B, s0B):
    df=v0B+NZ
    scale=(tf.nn.l2_loss(beta)*2*NZ+v0B*s0B)/(v0B+NZ)
    psi2=rinvchisq(df, scale)
    return(psi2)


# sample error variance of Y
def sample_sigma_chisq( N, epsilon, v0E, s0E):
    sigma2=rinvchisq(v0E+N, (tf.nn.l2_loss(epsilon)*2+v0E*s0E)/(v0E+N))
    return(sigma2)


# sample mixture weight
def sample_w( M, NZ):
    w=rbeta(1+NZ,1+(M-NZ))
    return(w)


    
    
    

## Simulate data

In [97]:
def build_toy_dataset(N, beta, sigmaY_true=1):
    
    features = len(beta)
    x = np.random.randn(N, features)
    y = np.dot(x, beta) + np.random.normal(0, sigmaY_true, size=N)
    return x, y

N = 40  # number of data points
M = 10  # number of features

beta_true = np.random.randn(M)
x, y = build_toy_dataset(N, beta_true)



In [None]:
# Could be implemented:
# building datasets using TF API without numpy



In [50]:
X = tf.constant(x, shape=[N,M], dtype=tf.float32)

In [53]:
Y = tf.constant(y, shape = [N,1], dtype=tf.float32)

In [70]:
bte = rbeta(1,1)

In [71]:
bte

<tf.Tensor 'Beta_6/sample/Reshape:0' shape=() dtype=float32>

## Parameters setup

In [174]:
# Distinction between constant and variables
# Variables: values might change between evaluation of the graph
# (if something changes within the graph, it should be a variable)

Emu = tf.Variable(0., trainable=False)
vEmu = tf.ones([N,1])
Ebeta = tf.zeros([M,1])
ny = tf.zeros(M)
Ew = tf.constant(0.)
epsilon = Y - tf.matmul(X,Ebeta) - vEmu*Emu
NZ = tf.constant([0])

Esigma2 = tf.nn.l2_loss(epsilon)/N
Epsi2 = rbeta(1.,1.)



In [175]:
epsilon

<tf.Tensor 'sub_47:0' shape=(40, 1) dtype=float32>

In [98]:
x

array([[-6.60579988e-01,  7.58302101e-01, -1.46788153e+00,
         3.39468405e-01,  1.16851946e+00, -2.48167299e-01,
        -6.30222325e-02,  6.72205190e-01,  2.44077392e+00,
         3.73492468e-02],
       [-4.49426602e-02, -8.28326650e-01, -7.21325989e-01,
         7.97508664e-01, -3.44951993e-01,  1.50063397e+00,
         1.24184652e+00,  1.64965709e+00, -4.94530112e-01,
         1.31747232e+00],
       [ 7.67430954e-01, -8.22647344e-01,  3.64923218e-01,
        -2.61833708e+00,  6.06119570e-01,  1.01787381e+00,
        -4.75207372e-01,  3.41966199e-01,  3.60929669e-01,
         5.59454253e-01],
       [-1.23792939e+00,  6.29173946e-02, -9.43314026e-01,
         8.86816953e-01, -1.05571162e+00,  1.02948422e+00,
        -9.26553113e-01, -5.32414939e-01,  1.82831924e+00,
         1.19360489e+00],
       [-2.52634278e-01,  1.44455724e+00, -5.17208672e-01,
         1.51671664e+00, -1.04004853e+00,  3.32540043e-01,
         5.40992697e-01,  2.15210146e+00,  1.23437321e+00,
         9.

In [94]:
#Standard parameterization of hyperpriors for variances
#double v0E=0.001,s0E=0.001,v0B=0.001,s0B=0.001;

#Alternative parameterization of hyperpriors for variances
v0E, v0B = 4, 4
s0B=((v0B-2)/v0B)*Epsi2
s0E=((v0E-2)/v0E)*Esigma2

In [102]:
# pre-computed elements for calculations
el1 = tf.matmul(tf.transpose(X),X)

<tf.Tensor 'MatMul_14:0' shape=(10, 10) dtype=float32>

In [182]:
epsilon

<tf.Tensor 'add_12:0' shape=(40,) dtype=float32>

## Tensorboard graph

In [76]:
writer = tf.summary.FileWriter('.')
writer.add_graph(tf.get_default_graph())

## Gibbs sampling

In [126]:
# Open session
sess = tf.Session()

In [194]:
# Initialize variables
init = tf.global_variables_initializer()
sess.run(init)

In [29]:
num_iter = 50

In [195]:
print(sess.run(tf.report_uninitialized_variables()))


[]


In [221]:
#debug for just 1 marker 0
epsilon = tf.add(epsilon, X[:,0]*Ebeta[0])
Cj=tf.nn.l2_loss(X[:,0])*2+Esigma2/Epsi2 #adjusted variance
rj= tf.matmul(tf.reshape(X[:,0], [1,N]),tf.reshape(epsilon, [N,1])) # mean

ratio=((tf.exp(-(tf.pow(rj,2))/(2*Cj*Esigma2))*tf.sqrt((Epsi2*Cj)/Esigma2)))

ratio=Ew/(Ew+ratio*(1-Ew))

In [None]:
if (ny[marker]==0):

    Ebeta[j]=0
elif (ny[j]==1):
    Ebeta[j]=rnorm(rj/Cj,Esigma2/Cj)
    update = epsilon-X.col(j)*Ebeta[j]
    sess.run(tf.assign(epsilon,update)) 

In [121]:
epsilon = tf.multiply(X[:,0],Ebeta[0])
epsilon

<tf.Tensor 'add_6:0' shape=(40, 40) dtype=float32>

In [223]:



b0 = rnorm(rj/Cj,Esigma2/Cj)

In [224]:
b00 = sess.run(b0)
b00

array([[-1.687462]], dtype=float32)

In [215]:
sess.run(Cj)

array([49.47475   ,  8.496997  ,  9.86839   , -1.4946489 ,  9.589608  ,
       10.245047  , -0.40934944, 12.744787  , 21.020695  ,  8.410867  ],
      dtype=float32)

In [211]:
ww = sess.run(rnorm(1.,1.))
ww

1.5368366

In [173]:
update_epsilon = epsilon.assign(tf.add(epsilon, X[:,0] * Ebeta[0]))

ValueError: Dimension 1 in both shapes must be equal, but are 1 and 40. Shapes are [40,1] and [40,40]. for 'Assign' (op: 'Assign') with input shapes: [40,1], [40,40].

In [177]:
mull= X[:,0]*Ebeta[0]
mull

<tf.Tensor 'mul_49:0' shape=(40,) dtype=float32>

In [176]:
epsilon

<tf.Tensor 'sub_47:0' shape=(40, 1) dtype=float32>

In [150]:
epsilon = Y - tf.matmul(X,Ebeta) - vEmu*Emu

In [180]:
ep31 = tf.squeeze(epsilon, axis=1) + mull
ep31

<tf.Tensor 'add_11:0' shape=(40,) dtype=float32>

In [156]:
ep22 = tf.reshape(epsilon, [40])
ep22

<tf.Tensor 'Reshape:0' shape=(40,) dtype=float32>

In [34]:
# actual code
for i in range(num_iter):
    
    Emu = sample_mu(N, Esigma2, Y, X, Ebeta)
    
    for j in range(M): # implement random column
        
        epsilon = epsilon + X[:,j]*Ebeta[j]
        Cj=el1[j]+Esigma2/Epsi2; #adjusted variance
        rj= tf.transpose(X[:,j])*epsilon; # mean
        
        ratio=(((tf.exp(-(tf.pow(rj,2))/(2*Cj*Esigma2))*tf.sqrt((Epsi2*Cj)/Esigma2))))
        ratio=Ew/(Ew+ratio*(1-Ew))
    
        if (ny[marker]==0):
            
            Ebeta[j]=0
        elif (ny[j]==1):
            Ebeta[j]=rnorm(rj/Cj,Esigma2/Cj)
            update = epsilon-X.col(j)*Ebeta[j]
            sess.run(tf.assign(epsilon,update)) 
    for j in range(M):
        print(sess.run(Ebeta[j]))
        print(sess.run(ny[j]))
    Ew=sample_w(M,NZ)
    epsilon=Y-X*Ebeta-vEmu*Emu

    Epsi2=sample_psi2_chisq(Ebeta,NZ,v0B,s0B)
    Esigma2=sample_sigma_chisq(N,epsilon,v0E,s0E)

NameError: name 'Esigma2' is not defined

In [137]:
testConst = tf.ones([5,5])

In [142]:
testConst = tf.constant(5, shape=[5,5])

In [143]:
sess.run(testConst)

array([[5, 5, 5, 5, 5],
       [5, 5, 5, 5, 5],
       [5, 5, 5, 5, 5],
       [5, 5, 5, 5, 5],
       [5, 5, 5, 5, 5]], dtype=int32)

In [144]:
testConst = testConst * 4

In [145]:
sess.run(testConst)

array([[20, 20, 20, 20, 20],
       [20, 20, 20, 20, 20],
       [20, 20, 20, 20, 20],
       [20, 20, 20, 20, 20],
       [20, 20, 20, 20, 20]], dtype=int32)

In [130]:
testVar = tf.Variable(tf.ones([5,5]))

In [134]:
sess.run(testVar)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]], dtype=float32)

In [135]:
testVar = testVar * 3

In [136]:
sess.run(testVar)

array([[3., 3., 3., 3., 3.],
       [3., 3., 3., 3., 3.],
       [3., 3., 3., 3., 3.],
       [3., 3., 3., 3., 3.],
       [3., 3., 3., 3., 3.]], dtype=float32)

In [146]:
testVar.assign(testVar*2)

AttributeError: 'Tensor' object has no attribute 'assign'

In [149]:
#v = tf.get_variable("v", shape=(), initializer=tf.zeros_initializer())
assignment = v.assign_add(1)
tf.global_variables_initializer().run(session = sess)
sess.run(assignment)  # or assignment.op.run(), or assignment.eval()


1.0

In [None]:
# WORKING VERSION






# Create random column order list (dataset) + iterator
col_list = tf.data.Dataset.range(ncol).shuffle(buffer_size=ncol)
col_next = col_list.make_one_shot_iterator().get_next()

#def scale_zscore(vector):
#    mean, var = tf.nn.moments(vector, axes=[0])
#    normalized_col = tf.map_fn(lambda x: (x - mean)/tf.sqrt(var), vector)
#    return normalized_col

# Launch of graph
with tf.Session() as sess:

    while True: # Loop on 'col_next', the queue of column iterator
        try:
            index = sess.run(col_next)
            dataset = tf.contrib.data.CsvDataset( # Creates a dataset of the current csv column
                        "ex.csv",
                        [tf.float32],
                        select_cols=[index]  # Only parse last three columns
                    )
            next_element = dataset.make_one_shot_iterator().get_next() # Creates an iterator
            print('Current column to be full pass: ' + str(index))
            current_col = []
            while True: 
                try:
                    current_col.append(sess.run(next_element)[0]) # Full pass
                except tf.errors.OutOfRangeError: # End of full pass
                    
                    print(current_col)
                    current_col = tf.convert_to_tensor([current_col])
                    mean, var = tf.nn.moments(current_col, axes=[0])
                    normalized_col = tf.map_fn(lambda x: (x - mean)/tf.sqrt(var), current_col)
                    print(normalized_col)
                    print('\n')
                    
                    break


            

        except tf.errors.OutOfRangeError:
            break


