Latent Dirichlet Allocation - Variational Inference
====

Based on the paper "Latent Dirchlet Allocation" by David M. Blei, Andrew Y. Ng, Michael I. Jordan

In [3]:
import numpy as np
import numpy.linalg as la
from scipy.special import digamma

## Parameters

document:    $m = 1,...,M$

topic:       $z = 1,...,k$

word:        $w = 1,...,N_m$

vocabulary : $v = 1,...,V$

$\alpha: 1 \times k$ vector of topic distribution probabilities

$\beta: k \times v$ matrix of word probabilities for each topic

In [6]:
np.random.seed(1337)

In [None]:
### Test data and pre-processing

In [None]:
from string import punctuation

doc1 = "This is a sample document for my computational statistics final project. The goal of the project is to make latent dirichlet allocation work and improve it in some way or another."
doc2 = "Graduate school is a lot of work. It can be hard to balance time between all my committments, but somehow it will work out in the end."
doc3 = "I like butterflies. Their wings look so neat. The way they somehow hang in a balance as they drink nectar is cool. I wonder what their population statistics look like."

docs = [doc1,doc2,doc3]

for doc in docs:
    words = doc.lower().translate(dict.fromkeys(map(ord, punctuation))).split()
    

In [7]:
M = 3
k = 10
N = np.random.randint(50,size=M)
V = 100

print('N: {0}'.format(N))

N: [23 28 40]


### Initialize parameters $\alpha, \beta, \phi$ and $\gamma$

In [8]:
alpha = np.random.dirichlet(np.ones(k),1)
beta = np.random.dirichlet(np.ones(k),V)

phi = np.array([1/k*np.ones([N[m],k]) for m in range(M)])
gamma = np.tile(alpha,(M,1)) + np.tile(N/k,(k,1)).T

### Optimize variational parameters $\phi$ and $\gamma$

In [None]:
# TODO: Split phi and gamma optimization apart for parallelization purposes
# TODO: See if some sort of vectorization is possible for speed-up
def optVarParams(alpha,beta,phi,gamma,words):
    # Optimize phi
    for m in range(M):
        for n in range(N[m]):
            for i in range(k):
                phi[m][n,i] = beta[i,words[m][n]] * np.exp(digamma(gamma[m,i]) - digamma(np.sum(gamma[m,:])))
    
    # Optimize gamma
    gamma = np.tile(alpha,(M,1)) + np.array(list(map(lambda x: np.sum(x,axis=0),phi))).T
    
    return phi,gamma

### Estimate model parameters $\alpha$ and $\beta$

In [None]:
def estModParams(alpha,beta,phi,gamma,words):
    # Optimize beta
    for i in range(k):
        for j in range (V):
            beta[i,j] = np.sum(np.array(list(map(lambda x: np.sum(x,axis=0),phi)),axis=0)


    return alpha,beta

### Expectation Maximization

In [None]:
convergence = 10**(-3)

'''
Pseudocode

while(!converged):
    phi,gamma  = optVarParams(alpha,beta,phi,gamma)
    alpha,beta = estModParams(alpha,beta,phi,gamma)
    if converged(alpha,beta):
        break
'''        

### Tests 
Testing out syntax and array dimensions

In [11]:
np.sum(np.array(list(map(lambda x: np.sum(x,axis=0),phi))),axis=0)

array([ 9.1,  9.1,  9.1,  9.1,  9.1,  9.1,  9.1,  9.1,  9.1,  9.1])