# TensorBayes

### Adaptation of `BayesC.cpp`



## Imports

In [1]:
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np

tfd = tfp.distributions

## File input

To do

In [2]:
# Get the numbers of columns in the csv:
# File I/O here 


# filenames = ""

# csv_in = open(filenames, "r")                        # open the csv
# ncol = len(csv_in.readline().split(","))            # read the first line and count the # of columns
# csv_in.close()                                      # close the csv
# print("Number of columns in the csv: " + str(ncol)) # print the # of columns

FileNotFoundError: [Errno 2] No such file or directory: 'ex.csv'

## Reproducibility

Seed setting for reproducable research.

In [80]:
# To do: get a numpy seed or look at how TF implements rng.

# each distributions.sample() seen below can be seedeed.
# ex. dist.sample(seed=32): return a sample of shape=() (scalar).

# Set graph-level seed
tf.set_random_seed(1234)

## Distributions functions

- Random Uniform:   
return a sample from a uniform distribution of limits parameter `lower ` and `higher`.
   
   
- Random Normal:   
return a sample from a normal distribution of parameter `mean` and `standard deviation`.
   
   
- Random Beta:   
return a random quantile of a beta distribution of parameter `alpha` and `beta`.
   
   
- Random Inversed Chi$^2$:   
return a random quantile of a inversed chi$^2$ distribution of parameter `degrees of freedom` and `scale`.
   
   
- Random Bernoulli:   
return a sample from a bernoulli distribution of probability of sucess `p`.
   
   

In [9]:
# Note: written as a translation of BayesC.cpp
# the function definitions might not be needeed,
# and the declarations of the distributions could be enough

def runif(lower, higher):
    dist = tfd.Uniform(lower, higher)
    return dist.sample()

def rnorm(mean, sd):
    dist = tfd.Normal(loc= mean, scale= sd)
    return dist.sample()

def rbeta(alpha, beta):
    dist = tfd.Beta(alpha, beta)
    return dist.sample()

def rinvchisq(df, scale):
    dist = tfd.InverseGamma(df*0.5, df*scale*0.5)
    return dist.sample()

def rbernoulli(p):
    dist = tfd.Bernoulli(probs=p)
    return dist.sample()


## Sampling functions

- Sampling of the mean   
     
    
- Sampling of the variance of beta   
   
   
- Sampling of the error variance of Y   
   
   
- Sample of the mixture weight   
   
   

In [4]:
# sample mean
def sample_mu(N, Esigma2, Y, X, beta): #as in BayesC, with the N parameter
    mean = tf.reduce_sum(tf.subtract(Y, tf.matmul(X, beta)))/N
    sd = tf.sqrt(Esigma2/N)
    mu = rnorm(mean, sd)
    return mu

# sample variance of beta
def sample_psi2_chisq( beta, NZ, v0B, s0B):
    df=v0B+NZ
    scale=(tf.nn.l2_loss(beta)*2*NZ+v0B*s0B)/(v0B+NZ)
    psi2=rinvchisq(df, scale)
    return(psi2)


# sample error variance of Y
def sample_sigma_chisq( N, epsilon, v0E, s0E):
    sigma2=rinvchisq(v0E+N, (tf.nn.l2_loss(epsilon)*2+v0E*s0E)/(v0E+N))
    return(sigma2)


# sample mixture weight
def sample_w( M, NZ):
    w=rbeta(1+NZ,1+(M-NZ))
    return(w)


    
    
    

In [81]:
## Simulate data

def build_toy_dataset(N, beta, sigmaY_true=1):
    
    features = len(beta)
    x = np.random.randn(N, features)
    y = np.dot(x, beta) + np.random.normal(0, sigmaY_true, size=N)
    return x, y

N = 40  # number of data points
M = 10  # number of features

beta_true = np.random.randn(M)
x, y = build_toy_dataset(N, beta_true)

X = tf.constant(x, shape=[N,M], dtype=tf.float32)
Y = tf.constant(y, shape = [N,1], dtype=tf.float32)

index = np.random.permutation(M)

# Could be implemented:
# building datasets using TF API without numpy

## Parameters setup

In [82]:
# Distinction between constant and variables
# Variables: values might change between evaluation of the graph
# (if something changes within the graph, it should be a variable)

Emu = tf.Variable(0., trainable=False)
vEmu = tf.ones([N,1])
Ebeta = np.zeros([M,1])
Ebeta_ = tf.Variable(Ebeta, dtype=tf.float32, trainable=False)
ny = np.zeros([M,1])
Ew = tf.Variable(0., trainable=False)
epsilon = tf.Variable(Y, trainable=False)
NZ = tf.Variable(0., trainable=False)
Esigma2 = tf.Variable(tf.nn.l2_loss(epsilon.initialized_value())/N, trainable=False)
Epsi2 = tf.Variable(rbeta(1.,1.), trainable=False)

In [83]:
#Standard parameterization of hyperpriors for variances
#double v0E=0.001,s0E=0.001,v0B=0.001,s0B=0.001;

#Alternative parameterization of hyperpriors for variances
v0E, v0B = 4, 4
s0B=((v0B-2)/v0B)*Epsi2
s0E=((v0E-2)/v0E)*Esigma2

## Tensorboard graph

In [84]:
writer = tf.summary.FileWriter('.')
writer.add_graph(tf.get_default_graph())

## Gibbs sampling

In [85]:
# Open session
sess = tf.Session()

In [86]:
# Initialize variables
init = tf.global_variables_initializer()
sess.run(init)

In [93]:
num_iter = 5000

In [88]:
# update ops
u_epsilon_add = epsilon.assign(tf.add(epsilon, tf.reshape(X[:,marker]*Ebeta[marker],[N,1])))
u_epsilon_sub = epsilon.assign(tf.subtract(epsilon, tf.reshape(X[:,marker]*Ebeta[0],[N,1])))
u_Ebeta_ = Ebeta_.assign(Ebeta)
u_epsilon = epsilon.assign(Y-tf.matmul(X,Ebeta_)-vEmu*Emu)
u_Emu = Emu.assign(sample_mu(N, Esigma2, Y, X, Ebeta_))
u_NZ = NZ.assign(np.sum(ny))
u_Ew = Ew.assign(sample_w(M,NZ))
u_epsi2 = Epsi2.assign(sample_psi2_chisq(Ebeta_,NZ,v0B,s0B))
u_Esigma2 = Esigma2.assign(sample_sigma_chisq(N,epsilon,v0E,s0E))


In [None]:
for i in range(num_iter):
    sess.run(u_Emu)
    for marker in index:
        sess.run(u_epsilon_add)
        Cj=tf.nn.l2_loss(X[:,marker])*2+Esigma2/Epsi2 #adjusted variance
        rj= tf.matmul(tf.reshape(X[:,marker], [1,N]),tf.reshape(epsilon, [N,1])) # mean
        ratio=((tf.exp(-(tf.pow(rj,2))/(2*Cj*Esigma2))*tf.sqrt((Epsi2*Cj)/Esigma2)))
        ratio=Ew/(Ew+ratio*(1-Ew))

        ny[marker] = sess.run(rbernoulli(ratio))

        if (ny[marker]==0):
            Ebeta[marker]=0

        elif (ny[marker]==1):
            Ebeta[marker]=sess.run(rnorm(rj/Cj,Esigma2/Cj))

        sess.run(u_epsilon_sub)

    #for i in range(len(Ebeta)):
    #    print(Ebeta[i], "\t", ny[i])
    sess.run(u_Ebeta_)
    sess.run(u_NZ)
    sess.run(u_Ew)
    sess.run(u_epsilon)
    sess.run(u_epsi2)
    sess.run(u_Esigma2)

In [None]:
print("Ebeta" + "\t" + ' ny' + '\t'+ ' beta_true')
for i in range(M):
    print(Ebeta[i], "\t", ny[i], "\t", beta_true[i])

In [None]:
import time

start_time = time.clock()
main()
print time.clock() - start_time, "seconds"

In [149]:
#v = tf.get_variable("v", shape=(), initializer=tf.zeros_initializer())
assignment = v.assign_add(1)
tf.global_variables_initializer().run(session = sess)
sess.run(assignment)  # or assignment.op.run(), or assignment.eval()


1.0

In [None]:
# WORKING VERSION






# Create random column order list (dataset) + iterator
col_list = tf.data.Dataset.range(ncol).shuffle(buffer_size=ncol)
col_next = col_list.make_one_shot_iterator().get_next()

#def scale_zscore(vector):
#    mean, var = tf.nn.moments(vector, axes=[0])
#    normalized_col = tf.map_fn(lambda x: (x - mean)/tf.sqrt(var), vector)
#    return normalized_col

# Launch of graph
with tf.Session() as sess:

    while True: # Loop on 'col_next', the queue of column iterator
        try:
            index = sess.run(col_next)
            dataset = tf.contrib.data.CsvDataset( # Creates a dataset of the current csv column
                        "ex.csv",
                        [tf.float32],
                        select_cols=[index]  # Only parse last three columns
                    )
            next_element = dataset.make_one_shot_iterator().get_next() # Creates an iterator
            print('Current column to be full pass: ' + str(index))
            current_col = []
            while True: 
                try:
                    current_col.append(sess.run(next_element)[0]) # Full pass
                except tf.errors.OutOfRangeError: # End of full pass
                    
                    print(current_col)
                    current_col = tf.convert_to_tensor([current_col])
                    mean, var = tf.nn.moments(current_col, axes=[0])
                    normalized_col = tf.map_fn(lambda x: (x - mean)/tf.sqrt(var), current_col)
                    print(normalized_col)
                    print('\n')
                    
                    break


            

        except tf.errors.OutOfRangeError:
            break


