In [1]:
import tensorflow as tf
import numpy as np
import tensorflow.contrib.eager as tfe

print(tf.__version__)
print(tf.__git_version__)

tfe.enable_eager_execution()

1.6.0
v1.6.0-0-gd2e24b6039


In [2]:
# set data dimensions
K = 3
V = 5
D = 10000
N = 100

# set the seed
np.random.seed(2014)

# beta prior parameters
eta = np.ones(V) * 1e-1

# beta profiles
beta = np.random.dirichlet(alpha=eta, size=K)

# theta prior parameters
alpha = np.ones(K) * 1e-1
# alpha[0] = 10

# document's prior topic allocation
theta = np.random.dirichlet(alpha=alpha, size=D)

# word's topic membership
z = [np.random.choice(K, size=N, replace=True, p=theta[d, :]) for d in range(D)]
z = np.vstack(z)

# actual words and counts
w = [np.array([np.random.choice(V, size=1, p=beta[k,:])[0] for k in z[d, :]]  + list(range(V))) for d in range(D)]
nw = [np.unique(w[d], return_counts=True)[1] for d in range(D)]
nw = np.vstack(nw)
w = np.vstack(w)

nw = tf.convert_to_tensor(nw, dtype=tf.float32)
nw = tfe.Variable(initial_value=tf.transpose(nw),
                 name="nw_vd")
# nw

In [3]:
nw

<tf.Variable 'nw_vd:0' shape=(5, 10000) dtype=float32, numpy=
array([[ 98.,  94.,  95., ...,  61.,  92., 101.],
       [  1.,   3.,   1., ...,   1.,   5.,   1.],
       [  1.,   1.,   1., ...,   1.,   1.,   1.],
       [  4.,   1.,   3., ...,  34.,   2.,   1.],
       [  1.,   6.,   5., ...,   8.,   5.,   1.]], dtype=float32)>

In [4]:
# initialize LDA parameters
def initialize_variables(K, V, D, alpha=1e-1, eta=1e-1, seed=2014):
    """
    Initialize parameters of LDA model returning adequate Tensors.

    args:
    
        K (int): number of LDA components 
        V (int): vocabulary size
        D (int): number of documents
        alpha (float): hyperparameter for theta prior
        eta (float): hyperparameter for beta prior
       
       
    returns:
    
        eta: [V] tensor with prior parameters (alpha) for beta
        lambda: [K, V] tensor with posterior word distribution per class
        phi: [K, V, D] tensor with vocabulary membership per document
        gamma: [K, D] tensor
        
    """
    tf.set_random_seed(seed)
    eta = tfe.Variable(initial_value=tf.ones(V) * eta, 
                       name="eta_v")
    alpha = tfe.Variable(initial_value=tf.ones(K) * alpha, 
                         name="alpha_k")    
    lam = tfe.Variable(initial_value=tf.abs(tf.random_normal(shape=(K, V))), 
                       name="lambda_kv")
    
    phi = tfe.Variable(initial_value=tf.random_normal(shape=(K, V, D)), 
                       name="phi_kvd")
    tf.assign(ref=phi, value=tf.nn.softmax(phi, axis=0))
    
    gamma = tfe.Variable(initial_value=tf.abs(tf.random_normal(shape=(K, D))), 
                        name="gamma_kd")
    
    e_log_beta = tfe.Variable(initial_value=tf.abs(tf.random_normal(shape=(K, V, D))) * .0, 
                        name="e_log_beta_kvd")
    
    e_log_theta = tfe.Variable(initial_value=tf.abs(tf.random_normal(shape=(K, V, D))) * .0, 
                        name="e_log_theta_kvd")
    
    return eta, alpha, lam, phi, gamma, e_log_beta, e_log_theta

# test
eta, alpha, lam, phi, gamma, e_log_beta, e_log_theta = initialize_variables(K, V, D)



In [5]:
# lambda update
def update_lambda(lam, eta, phi, nw):
    
    K = lam.shape.as_list()[0]
    for k in range(K):
        tf.scatter_update(ref=lam, 
                  indices=k, 
                  updates=tf.reduce_sum(tf.multiply(phi[k], nw), axis=1) + eta)
        
    return lam

# test
# update_lambda(lam, eta, phi, nw)
# print(lam)

In [6]:
# gamma update
def update_gamma(gamma, alpha, phi, nw):
    
    K = gamma.shape.as_list()[0]
    for k in range(K):
        tf.scatter_update(ref=gamma, 
                  indices=k, 
                  updates=tf.reduce_sum(tf.multiply(phi[k], nw), axis=0) + alpha[k])

        
    return gamma

tmp = gamma.value()
update_gamma(gamma, alpha, phi, nw)
print(gamma)


<tf.Variable 'gamma_kd:0' shape=(3, 10000) dtype=float32, numpy=
array([[27.037819 , 23.25609  ,  7.463204 , ..., 33.399498 , 12.000949 ,
        42.4766   ],
       [39.71325  ,  7.5656514, 77.13596  , ..., 41.464417 , 80.972755 ,
        20.695469 ],
       [38.548927 , 74.47826  , 20.700846 , ..., 30.436089 , 12.326286 ,
        42.127922 ]], dtype=float32)>


In [7]:
tmp

<tf.Tensor: id=127, shape=(3, 10000), dtype=float32, numpy=
array([[0.856814  , 0.17276734, 2.0289466 , ..., 2.2287955 , 0.6736742 ,
        1.2884287 ],
       [0.12763453, 0.48072174, 0.32087612, ..., 0.89596504, 1.4283997 ,
        1.2293065 ],
       [0.7794944 , 0.09269007, 2.6102738 , ..., 0.2585634 , 0.5823797 ,
        0.7791185 ]], dtype=float32)>

In [8]:
def update_e_log_beta(e_log_beta, lam):
    
    K = lam.shape.as_list()[0]
    for k in range(K):
        tf.scatter_update(ref=e_log_beta,
                  indices=k,
                  updates=tf.tile(tf.expand_dims(tf.digamma(lam[k]) - tf.digamma(tf.reduce_sum(lam[k])), axis=1), multiples=[1, D]))
    
    return e_log_beta

print(e_log_beta)
update_e_log_beta(e_log_beta, lam);
print(e_log_beta)

<tf.Variable 'e_log_beta_kvd:0' shape=(3, 5, 10000) dtype=float32, numpy=
array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]], dtype=float32)>
<tf.Variable 'e_log_beta_kvd:0' shape=(3, 5, 10000) dtype=float32, numpy=
array([[[ -4.3613567,  -4.3613567,  -4.3613567, ...,  -4.3613567,
          -4.3613567,  -4.3613567],
        [ -1.004497 ,  -1.004497 ,  -1.004497 , ...,  -1.004497 ,
          -1.004497 ,  -1.004497 ],
        [ -3.9851837,  -3.9851837,  -3.98

In [9]:
def update_e_log_theta(e_log_theta, gamma):
    
    tf.assign(ref=e_log_theta, 
              value=tf.tile(tf.expand_dims(tf.digamma(gamma) - tf.digamma(tf.reduce_sum(gamma, axis=0)), axis=1), multiples=[1, V, 1]))

    return e_log_theta

update_e_log_theta(e_log_theta, gamma)

<tf.Variable 'e_log_theta_kvd:0' shape=(3, 5, 10000) dtype=float32, numpy=
array([[[-1.3734272 , -1.5271437 , -2.7105618 , ..., -1.1585617 ,
         -2.209313  , -0.9149213 ],
        [-1.3734272 , -1.5271437 , -2.7105618 , ..., -1.1585617 ,
         -2.209313  , -0.9149213 ],
        [-1.3734272 , -1.5271437 , -2.7105618 , ..., -1.1585617 ,
         -2.209313  , -0.9149213 ],
        [-1.3734272 , -1.5271437 , -2.7105618 , ..., -1.1585617 ,
         -2.209313  , -0.9149213 ],
        [-1.3734272 , -1.5271437 , -2.7105618 , ..., -1.1585617 ,
         -2.209313  , -0.9149213 ]],

       [[-0.9830153 , -2.6959803 , -0.312984  , ..., -0.9393289 ,
         -0.2641325 , -1.6464968 ],
        [-0.9830153 , -2.6959803 , -0.312984  , ..., -0.9393289 ,
         -0.2641325 , -1.6464968 ],
        [-0.9830153 , -2.6959803 , -0.312984  , ..., -0.9393289 ,
         -0.2641325 , -1.6464968 ],
        [-0.9830153 , -2.6959803 , -0.312984  , ..., -0.9393289 ,
         -0.2641325 , -1.6464968 ],
     

In [10]:
e_log_theta

<tf.Variable 'e_log_theta_kvd:0' shape=(3, 5, 10000) dtype=float32, numpy=
array([[[-1.3734272 , -1.5271437 , -2.7105618 , ..., -1.1585617 ,
         -2.209313  , -0.9149213 ],
        [-1.3734272 , -1.5271437 , -2.7105618 , ..., -1.1585617 ,
         -2.209313  , -0.9149213 ],
        [-1.3734272 , -1.5271437 , -2.7105618 , ..., -1.1585617 ,
         -2.209313  , -0.9149213 ],
        [-1.3734272 , -1.5271437 , -2.7105618 , ..., -1.1585617 ,
         -2.209313  , -0.9149213 ],
        [-1.3734272 , -1.5271437 , -2.7105618 , ..., -1.1585617 ,
         -2.209313  , -0.9149213 ]],

       [[-0.9830153 , -2.6959803 , -0.312984  , ..., -0.9393289 ,
         -0.2641325 , -1.6464968 ],
        [-0.9830153 , -2.6959803 , -0.312984  , ..., -0.9393289 ,
         -0.2641325 , -1.6464968 ],
        [-0.9830153 , -2.6959803 , -0.312984  , ..., -0.9393289 ,
         -0.2641325 , -1.6464968 ],
        [-0.9830153 , -2.6959803 , -0.312984  , ..., -0.9393289 ,
         -0.2641325 , -1.6464968 ],
     

In [11]:
e_log_beta

<tf.Variable 'e_log_beta_kvd:0' shape=(3, 5, 10000) dtype=float32, numpy=
array([[[ -4.3613567,  -4.3613567,  -4.3613567, ...,  -4.3613567,
          -4.3613567,  -4.3613567],
        [ -1.004497 ,  -1.004497 ,  -1.004497 , ...,  -1.004497 ,
          -1.004497 ,  -1.004497 ],
        [ -3.9851837,  -3.9851837,  -3.9851837, ...,  -3.9851837,
          -3.9851837,  -3.9851837],
        [ -3.3059347,  -3.3059347,  -3.3059347, ...,  -3.3059347,
          -3.3059347,  -3.3059347],
        [ -2.9363658,  -2.9363658,  -2.9363658, ...,  -2.9363658,
          -2.9363658,  -2.9363658]],

       [[ -2.9444265,  -2.9444265,  -2.9444265, ...,  -2.9444265,
          -2.9444265,  -2.9444265],
        [ -3.2798169,  -3.2798169,  -3.2798169, ...,  -3.2798169,
          -3.2798169,  -3.2798169],
        [ -1.8179834,  -1.8179834,  -1.8179834, ...,  -1.8179834,
          -1.8179834,  -1.8179834],
        [ -0.9550425,  -0.9550425,  -0.9550425, ...,  -0.9550425,
          -0.9550425,  -0.9550425],
      

In [12]:
import time
start = time.time()

def update_phi(e_log_beta, e_log_theta):
    tf.assign(ref=phi, 
              value=e_log_beta + e_log_theta)
    tf.assign(ref=phi, value=tf.nn.softmax(logits=phi, axis=0))
    return phi


update_phi(e_log_beta, e_log_theta)

end = time.time()
print(end - start)
print(phi)

0.012933015823364258
<tf.Variable 'phi_kvd:0' shape=(3, 5, 10000) dtype=float32, numpy=
array([[[1.4095956e-01, 4.3829095e-01, 2.1572903e-02, ...,
         1.6298778e-01, 3.3500813e-02, 3.3506691e-01],
        [4.8282588e-01, 3.2089731e-01, 2.5070122e-01, ...,
         5.8431685e-01, 4.2049536e-01, 5.9336036e-01],
        [7.1916632e-02, 2.6925978e-01, 1.0304886e-02, ...,
         8.4213004e-02, 1.6105130e-02, 1.9222322e-01],
        [3.7234083e-02, 3.6065232e-02, 7.2814878e-03, ...,
         4.8458524e-02, 1.2219872e-02, 7.5708210e-02],
        [2.4752221e-01, 1.3505588e-01, 1.1857255e-01, ...,
         3.3570617e-01, 2.4189721e-01, 3.3157957e-01]],

       [[8.5903984e-01, 5.6170487e-01, 9.7842681e-01, ...,
         8.3701181e-01, 9.6649909e-01, 6.6493213e-01],
        [7.3313750e-02, 1.0246790e-02, 2.8330383e-01, ...,
         7.4765489e-02, 3.0226198e-01, 2.9338680e-02],
        [9.2808187e-01, 7.3072714e-01, 9.8969460e-01, ...,
         9.1578579e-01, 9.8389465e-01, 8.0777389e-01]

In [13]:
nw_kvd = tf.tile(tf.expand_dims(nw / tf.reduce_sum(nw), axis=0), 
                 multiples=[K, 1, 1])
nw_kvd

<tf.Tensor: id=312, shape=(3, 5, 10000), dtype=float32, numpy=
array([[[9.3333336e-05, 8.9523812e-05, 9.0476191e-05, ...,
         5.8095236e-05, 8.7619046e-05, 9.6190473e-05],
        [9.5238096e-07, 2.8571428e-06, 9.5238096e-07, ...,
         9.5238096e-07, 4.7619046e-06, 9.5238096e-07],
        [9.5238096e-07, 9.5238096e-07, 9.5238096e-07, ...,
         9.5238096e-07, 9.5238096e-07, 9.5238096e-07],
        [3.8095238e-06, 9.5238096e-07, 2.8571428e-06, ...,
         3.2380951e-05, 1.9047619e-06, 9.5238096e-07],
        [9.5238096e-07, 5.7142856e-06, 4.7619046e-06, ...,
         7.6190477e-06, 4.7619046e-06, 9.5238096e-07]],

       [[9.3333336e-05, 8.9523812e-05, 9.0476191e-05, ...,
         5.8095236e-05, 8.7619046e-05, 9.6190473e-05],
        [9.5238096e-07, 2.8571428e-06, 9.5238096e-07, ...,
         9.5238096e-07, 4.7619046e-06, 9.5238096e-07],
        [9.5238096e-07, 9.5238096e-07, 9.5238096e-07, ...,
         9.5238096e-07, 9.5238096e-07, 9.5238096e-07],
        [3.8095238e-06,

In [14]:
def elbo(phi, e_log_beta, e_log_theta, nw_kvd):

    A = tf.reduce_sum(nw_kvd * phi * (e_log_beta + e_log_theta - tf.log(phi + 1e-6)))
    
    
    return A.numpy()

elbo(phi, e_log_beta, e_log_theta, nw_kvd)    

-3.105609

In [15]:
seed = 1

In [48]:
seed += 1
eta, alpha, lam, phi, gamma, e_log_beta, e_log_theta = initialize_variables(K, V, D)

prev_elbo = 0.0
next_elbo = 0.0
iter = 0

for i in range(100000):
    
    for j in range(100000):
        # E-Step:
        update_e_log_beta(e_log_beta, lam);
        update_e_log_theta(e_log_theta, gamma);
        update_phi(e_log_theta=e_log_theta, e_log_beta=e_log_beta)
        gamma_prev = gamma.value()
        update_gamma(gamma, alpha, phi, nw)
        
        diff = tf.reduce_mean(tf.abs(gamma_prev - gamma.value()))
        if diff < 1e-6:
            break
    
    # M-Step:
    update_lambda(lam, eta, phi, nw)
    
    
    next_elbo = elbo(phi, e_log_beta, e_log_theta, nw_kvd)
#     next_elbo = 0.0
    print("Iteration:", iter, "ELBO:", next_elbo)
    
    diff = np.abs(next_elbo - prev_elbo)
    if diff < 1e-6:
        print("Converged!")
        break
    else:
        iter += 1
        prev_elbo = next_elbo



KeyboardInterrupt: 

In [17]:
print(np.round((beta), decimals=3))
print(np.transpose(np.round(tf.transpose(lam) / tf.reduce_sum(lam, axis=1), decimals=3)))

[[0.    0.001 0.005 0.992 0.003]
 [0.99  0.001 0.009 0.    0.   ]
 [0.926 0.016 0.001 0.004 0.053]]
[[0.875 0.018 0.014 0.058 0.035]
 [0.629 0.015 0.014 0.315 0.027]
 [0.    0.01  0.014 0.964 0.012]]


In [18]:
# topic term distribution:
topic_term_dist = np.round(np.vstack([(lam / tf.reduce_sum(lam)).numpy()  for lam in lambda_]), decimals=3)
topic_term_dist

# doc_topic_dists :array-like, shape (n_docs, n_topics)
doc_topic_dist = tf.stack([tf.reshape(g_k, shape=(1000, )) for g_k in gamma], axis=1)
doc_topic_dist = doc_topic_dist / tf.reduce_sum(doc_topic_dist, axis=1, keep_dims=True)
doc_topic_dist = doc_topic_dist.numpy()

# doc_lengths :array-like, shape n_docs
doc_len = tf.reduce_sum(nw, axis=1)
doc_len = doc_len.numpy()

# vocab :array-like, shape n_terms
vocab = np.array(list(range(V)))

# term_frequency :array-like, shape n_terms
term_frec = tf.reduce_sum(nw, axis=0)
term_frec = term_frec.numpy()

NameError: name 'lambda_' is not defined

In [19]:
import pandas as pd

In [20]:
data = pd.read_csv("NIPS_1987-2015.csv")

In [57]:
nw = np.array(data.iloc[:, 1:])
nw = nw.astype('float32')
nw = nw.transpose()

In [58]:
nw = tf.convert_to_tensor(nw)
nw = nw[0:100, 0:1000]
nw = tf.transpose(nw)

In [59]:
K = 10
V, D = nw.shape

In [60]:
nw_kvd = tf.tile(tf.expand_dims(nw / tf.reduce_sum(nw), axis=0), 
                 multiples=[K, 1, 1])
nw_kvd

<tf.Tensor: id=177490, shape=(10, 1000, 100), dtype=float32, numpy=
array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       ...,

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
   

In [62]:
seed = 1
eta, alpha, lam, phi, gamma, e_log_beta, e_log_theta = initialize_variables(K, V, D)

prev_elbo = 0.0
next_elbo = 0.0
iter = 0

import time
start = time.time()

for i in range(10000):
    
    for j in range(1000000):
        # E-Step:
        update_e_log_beta(e_log_beta, lam);
        update_e_log_theta(e_log_theta, gamma);
        update_phi(e_log_theta=e_log_theta, e_log_beta=e_log_beta)
        gamma_prev = gamma.value()
        update_gamma(gamma, alpha, phi, nw)
        
        diff = tf.reduce_mean(tf.abs(gamma_prev - gamma.value()))
        if diff < 1e-3:
            break
    
    # M-Step:
    update_lambda(lam, eta, phi, nw)
    
    
    next_elbo = elbo(phi, e_log_beta, e_log_theta, nw_kvd)
    print("Iteration:", iter, "ELBO:", next_elbo)
    
    diff = np.abs(next_elbo - prev_elbo)
    if diff < 1e-6:
        print("Converged!")
        break
    else:
        iter += 1
        prev_elbo = next_elbo

        
end = time.time()
print(end - start)


Iteration: 0 ELBO: -7.432144
Iteration: 1 ELBO: -5.433588
Iteration: 2 ELBO: -5.341462
Iteration: 3 ELBO: -5.2919736
Iteration: 4 ELBO: -5.263893
Iteration: 5 ELBO: -5.2484136
Iteration: 6 ELBO: -5.2394495
Iteration: 7 ELBO: -5.2338495
Iteration: 8 ELBO: -5.2302885
Iteration: 9 ELBO: -5.2278996
Iteration: 10 ELBO: -5.2259912
Iteration: 11 ELBO: -5.2249517
Iteration: 12 ELBO: -5.2242002
Iteration: 13 ELBO: -5.2236214
Iteration: 14 ELBO: -5.2231464
Iteration: 15 ELBO: -5.222804
Iteration: 16 ELBO: -5.222607
Iteration: 17 ELBO: -5.2224836
Iteration: 18 ELBO: -5.2223663
Iteration: 19 ELBO: -5.222261
Iteration: 20 ELBO: -5.222167
Iteration: 21 ELBO: -5.2220936
Iteration: 22 ELBO: -5.222032
Iteration: 23 ELBO: -5.2220173
Iteration: 24 ELBO: -5.2219954
Iteration: 25 ELBO: -5.2219872
Iteration: 26 ELBO: -5.221991
Iteration: 27 ELBO: -5.2219872
Iteration: 28 ELBO: -5.221985
Iteration: 29 ELBO: -5.2219744
Iteration: 30 ELBO: -5.2219625
Iteration: 31 ELBO: -5.2219877
Iteration: 32 ELBO: -5.222019

In [63]:
lam

<tf.Variable 'lambda_kv:0' shape=(10, 1000) dtype=float32, numpy=
array([[0.1       , 0.1       , 0.1       , ..., 0.1       , 0.10013366,
        0.10001303],
       [0.1       , 0.1       , 0.1       , ..., 0.1       , 0.10004404,
        0.10002226],
       [0.1       , 0.1       , 0.1       , ..., 0.1       , 7.044639  ,
        0.1       ],
       ...,
       [0.1       , 0.1       , 0.1       , ..., 0.1       , 0.1       ,
        0.1       ],
       [0.1       , 0.1       , 0.1       , ..., 0.1       , 0.1       ,
        0.10000306],
       [0.1       , 0.1       , 0.1       , ..., 0.1       , 0.10000444,
        3.0999494 ]], dtype=float32)>