In [2]:
import tensorflow as tf
import pandas as pd
import numpy as np

In [34]:
"""

paramters 
------

n_input: int
    number of units of input layer, corresponses the ingreident choice set

n_output: int
    number of units of output layer, the essential part of a training sampler, {N: the input ingredients, M: the output ingredient}

n_hidden: int
    number of hidden units
    
kk: int
    number of steps within contrastive divergence
    
momentum: bool
    whether to use momentum
    
GB: bool
    if true, use a Gaussian-Bernoulli (real-valued input and ouput units) model, otherwise use a Bernoulli-Bernoulli (binary) model  
"""


class RBM:
    def __init__(self, n_input, n_output, n_hidden, kk = 1, momentum = False, GB = False):
        self.n_input = n_input
        self.n_output = n_output
        self.n_visible = n_input + n_output
        self.n_hidden = n_hidden
        self.kk = kk
        
        # weights and biases
        self.ww = tf.Variable(tf.uniform([self.n_visible, n_hidden], minval = 0.0, maxval = 1.0, dtype = tf.float32), name = "visible_hidden_weights")
        self.vb = tf.Variable(tf.zeros([self.n_visible]) + 0.1, dtype = tf.float32, name = "visible_biases")
        self.hb = tf.Variable(tf.zeros([n_hidden]) + 0.1, dtype = tf.float32, name = "hidden_biases")
        
        # update velocities
        self.ww_v = tf.Variable(tf.zeros([self.n_visible, n_hidden]), dtype = tf.float32, name = "weight_speed") 
        self.vb_v = tf.Variable(tf.zeros([self.n_visible]), dtype = tf.float32, name = "visible_bias_speed") 
        self.hb_v = tf.Variable(tf.zeros([n_hidden]), dtype = tf.float32, name = "hidden_bias_speed")  

        # Momentum method
        if momentum:
            self.momentum = tf.placeholder(tf.float32)
        else:
            self.momentum = 0.0
    
        self.vv = tf.placeholder(tf.int8)
        
        
        '''naive tran_step'''

        def train_step(self, train_data):
            dw, dvb, dhb = self.CD(train_data)
            
        
        '''    
        # train with momentum
        def train_step(self, train_data):
        dw, dvb, dhb = self.CD(train_data)
        
        # momentum method: v = - gradient * lr + v * momemtum
        new_ww_v = w_grad * self.lr + self.ww_v * momentum
        new_vb_v = vb_grad * self.lr + self.vb_v * momentum
        new_hb_v = hb_grad * self.lr + self.vb_v * momentum
        # update velocity        
        update_ww_v = tf.assign(self.ww_v, new_ww_v)
        update_vb_v = tf.assign(self.vb_v, new_vb_v)
        update_hb_v = tf.assign(self.hb_v, new_hb_v)
        # update weights and biases
        update_ww = tf.assign(self.ww, self.ww + new_ww_v)
        update_vb = tf.assign(self.vb, self.vb + new_vb_v)
        update_vb = tf.assign(sef.hb, self.hb + new_hb_v)
        '''             


    def CD(self, vv):
        '''Constrastive divergence with k steps using Gibbs sampling'''
        hh_prob = hh_prob_given_v(vv)
        hh = sample_hh(hh_prob)

        
        new_vv_prob = vv_prob_given_hh(hh)
        new_vv = sample_vv(new_vv_prob)
        new_hh_prob = hh_prob_given_vv(new_vv)
        new_hh = sample_hh(new_hh_prob)
        
        for ii in range(self.kk):
            new_vv_prob = vv_prob_given_hh(hh)
            new_vv = sample_v(new_vv_prob)
            new_hh_prob = hh_prob_given_vv(new_vv)
            new_hh = sample_hh(new_hh_prob)
        
        
        # positive divergence  h(v).v^T
        pos_div = tf.matmul(tf.tranpose(vv), hh_prob)
        # negative divergence h(v').v'^T
        neg_div = tf.matmul(tf.tranpose(new_vv), new_hh_prob)
        # approaximate the gradients    
        dw = pos_div - neg_div
        dvb = vv - new_vv
        dhb = hh_prob - new_hh_prob
        return dw, dvb, dhb
    
    
    # return sigm(weight dot visibles + hidden_biases)
    def hh_prob_given_vv(self, vv):
        return tf.nn.sigmoid(tf.matmul(vv, self.vh_weight) + self.h_bias)
        
    def vv_prob_given_hh(self, hh):
        return tf.nn.sigmoid(tf.matmul(hh, tf.transpose(self.vh_weight)) + self.v_bias)
    
    def sample_hh(self, vv_prob):
        hh = tf.round(vv_prob)
        return hh
    
    def sample_vv(self, hh_prob):
        vv = tf.round(hh_prob)
        assert (vv == 1 or vv == 0) 
        return vv
        
    def free_energy():
        pass
    
    def pseudo_log_likelihood(self, visible):
        pass
    

In [4]:
"""

paramters 
------

n_choice: int
    number of units of choice set layer, corresponses the ingredient choice set

n_selected: int
    number of units of output layer, the essential part of a training sampler, {N: the input ingredients, M: the output ingredient}

n_hidden: int
    number of hidden units
    
n_flavor: int
    number of flavor compounds, also number of features
    
n_batch: int
    mini-batch size for each PCD
    
kk: int
    usually same as n_batch
    
choice_set: {n_ingredient, n_flavor}, contains ingredients and their flavor compound information

recipes: {n_sample, n_ingredient, n_flavor}, n recipes, each contains their ingredients 
    
GB: bool
    if true, use a Gaussian-Bernoulli (real-valued input and ouput units) model, otherwise use a Bernoulli-Bernoulli (binary) model  
"""

class B_DRBM:
    def __init__(self, choice_set, recipes, flavors, n_choice, n_selected, n_hidden, n_flavor, lr  kk = 1, n_batch = 1, momentum = True):
        self.n_choice = n_choice
        self.n_selected = n_selected
        self.n_hidden = n_hidden
        self.n_flavor = n_flavor
        self.n_batch = n_batch
        self.kk = kk
       
        # weights and biases
        self.w_in = tf.Variable(tf.random_normal([n_choice, n_hidden], mean=0, stddev=1), name = "input_hidden_weight")
        self.w_out = tf.Variable(tf.truncated_normal([n_selected, n_hidden], mean=0, stddev=1), nmae = "output_hidden_weight")
        self.b_h = tf.Variable(tf.zeros([self.n_hidden]) + 0.1, dtype = tf.float32, name = "hidden_bias")
        self.b_in = tf.constant(tf.zeros[self.n_choice], dtype = tf.float32, name = "None_input_bias")
        self.b_out = tf.Variable(tf.zeros([self.n_selected]) + 0.1, dtype = tf.float32, name = "output_bias")

        
        # momentum method: v = - gradient * lr + v * momemtum
        #new_ww_v = w_grad * self.lr + self.ww_v * momentum
        #new_vb_v = vb_grad * self.lr + self.vb_v * momentum
        #new_hb_v = hb_grad * self.lr + self.vb_v * momentum
        # update velocity        
        #update_ww_v = tf.assign(self.ww_v, new_ww_v)
        #update_vb_v = tf.assign(self.vb_v, new_vb_v)
        #update_hb_v = tf.assign(self.hb_v, new_hb_v)
        
        '''
        self.ww = tf.Variable(tf.uniform([self.n_visible, n_hidden], minval = 0.0, maxval = 1.0, dtype = tf.float32), name = "visible_hidden_weights")
        self.vb = tf.Variable(tf.zeros([self.n_visible]) + 0.1, dtype = tf.float32, name = "visible_bias")
        self.hb = tf.Variable(tf.zeros([n_hidden]) + 0.1, dtype = tf.float32, name = "hidden_bias")
        '''

    # compute average feature of choice set or a single recipe
    def get_mean_choice(self, choice_set):
        return tf.reduce_mean(choice_set, 0)
        
        
    def get_mean_recipes(self, recipes):
        return tf.reduce_mean(choice_set, 1)
    
    
    def train(self, choice_set, recipes):
        '''        
        parameters
        ---------------------
        zz: [n_ingr, n_flavor]
            all ingredients and flavor set, choice set, shap
        xxs:[n_sample, n_ingr, n_flaovr]
            mutilple recipes for training step
        '''
        # update for each batch
        nn = tf.shape(recipes)(0) // self.n_batch
        zz = get_mean_choice(choice_set)
        
        for ii in range(nn):
            ### make sure it is the right slice
            batch = tf.slice(recipes, [nn * self.n_batch, self.n_choice, self.n_flavor], [(nn + 1) * self.n_batch, self.n_choice, self.n_flavor])
            xxs = get_mean_recipes(batch)
            last_v_sample = zz + xxs[0, :]
            
            for jj in self.n_batch:
                xx = xxs[jj, :]
                v_sample = zz + xx
                dw_in, dw_out, db_h, db_out, last_v_sample = self.PCD(v_sample, last_v_sample)
                update(dw_in, dw_out, db_h, db_out)
                # compute pseudo_log_likelihood for tracking the performance

        
        
    def PCD(self, v_sample, last_v_sample):
        # Persietent constrastive divergence with k steps using Gibbs sampling for data samples in a mini-batch
        concate_weight = tf.concat([self.w_in, self.w_out], 0)
        concate_v_bias = tf.concat([self.b_in, self.b_out], 0)
        
        # Markov chain starts from the visibles of the latest gibbs sample
        h_prob = h_prob_give_v(last_v_sample, concate_weight, h_bias)
        h_sample = sample_h(h_prob)            
        new_v_prob = v_prob_give_h(h_sample, concate_weight, concate_v_bias)
        new_v_sample = sample_v(new_v_prob)

        for ii in range(self.n_batch - 1):
            new_h_prob = h_prob_give_v(new_v_sample)
            new_h_sample = sample(new_h_prob)
            new_v_prob = v_prob_give_h(new_h_sample)
            new_v_sample = sample_v(new_v_prob)

        last_v_sample = new_v_sample

        # Update for each data sample inside a batch
        # positive divergence  h(v).v^T     negative divergence: db_h = h_prob - new_h_prob
        pos_div = tf.matmul(h_prob, tf.tranpose(v_sample))
        neg_div = tf.matmul(new_h_prob, tf.tranpose(new_v_sample) )

        dw = pos_div - neg_div
        db_h = h_prob - new_h_prob
        db_v = v_sample - new_v_sample
        
        dw_in, dw_out = tf.split(dw, [self.n_choice, self.n_selected], 0) # split input and output weights
        db_in, db_out = tf.split(dv, [self.n_choice, self.n_selected], 0) # split bias
        
        return dw_in, dw_out, db_h, db_out, last_v_sample
    
    
    # return sigm(weight dot visibles + hidden_biases)
    def h_prob_given_v(v_sample, concate_weight, h_bias):
        # merge two weights
        return tf.nn.sigmoid(tf.matmul(v_sample, concate_weight) + h_bias)
        
    def v_prob_given_h(h_sample, concate_weight, concate_v_bias):
        return tf.nn.sigmoid(tf.matmul(h_sample, tf.transpose(concate_weight)) + concate_v_bias)
    
    def sample_h(v_prob):
        h_sample = tf.round(v_prob)
        return h_sample
    
    def sample_v(h_prob):
        vv_sample = tf.round(h_prob)
        assert (v_sample == 1 or v_sample == 0) 
        return v_sample
        
    def update(dw_in, dw_out, db_h, db_out):
        update_w_in = tf.assign(self.w_in, self.w_in + tf.matmul(dw_in, self.lr))
        update_w_out = tf.assign(self.w_out, self.w_out + tf.matmul(dw_out, self.lr))
        update_b_h = tf.assign(self.b_h, self.b_h + tf.matmul(db_h, self.lr))
        update_b_out = tf.assign(self.b_out, self.b_out + tf.matmul(db_out, self.lr))
        
    def free_energy(self, v_sample):
        ww = tf.concat([self.w_in, self.w_out], 0)
        v_b = tf.concat([self.b_in, self.b_out], 0)
        
        vb_term = - tf.matmul(v_sample, tf.transpose(v_b))
        hidden_term = tf.reduce_sum(tf.log(1 + tf.exp(tf.matmul(v_sample, ww) + self.h_b)))
    
    def pseudo_llh(self, v_sample):
        # pseudo_log_likelihood
        v_sample_flip_i = v_sample
        pass
    

In [4]:
#test
data = np.array(pd.read_csv(r".\data\test_recip_ingr.csv", nrows = 50))
ingr_flavor = np.array(pd.read_csv(r".\data\matrix_ingr_comp.csv"))

In [None]:
n_choice = 1
n_flavor = 2

self.choice_set = tf.placeholder(tf.float32, shape=[n_choice, n_flavor])
self.recipes = tf.placeholder(tf.float32, shape=[None, n_choice, n_flavor])

In [22]:
ingr_flavor.shape

(1530, 1107)

In [13]:
s = tf.constant([1,2,3])

In [23]:
a = tf.slice(s, [1], [2])

In [6]:
batch = tf.slice(data, [0 * self.n_batch], [self.n_choice, self.n_flavor])

NameError: name 'self' is not defined

In [27]:
a = tf.Variable([[1,2,3], [4,5,6]])
b = tf.Variable([[7,8,9], [10,11,12]])

In [28]:
c = tf.concat([a,b], 0)

In [29]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print(sess.run(c))

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]


In [24]:
recipes.shape

(50, 1530)