In [9]:
%load_ext Cython

In [11]:
%%cython
from __future__ import division
#cimport cython
import numpy as np
cimport numpy as np
ctypedef np.float64_t dtype_t
ctypedef np.uint32_t uitype_t

# @cython.boundscheck(False)
# @cython.wraparound(False)

def sampling ( np.ndarray[uitype_t, ndim=2] Ndk,
               np.ndarray[dtype_t, ndim=2] Nkw_mean,
	       np.ndarray[dtype_t, ndim=2] Ndk_mean,
               np.ndarray[dtype_t, ndim=2] expElogbeta,
               np.ndarray[dtype_t, ndim=1] uni_rvs,
	       list z,
               list wordtks,
	       list lengths,
	       double alpha,
	       double update_unit,
               int num_sim,
               int burn_in ):

    #if not phi.flags.f_contiguous: phi = phi.copy('F')
    #if not Adk.flags.c_contiguous: phi = phi.copy('C')
    ##if not Bkw.flags.f_contiguous: phi = phi.copy('F')

    cdef Py_ssize_t D = Ndk.shape[0]
    cdef Py_ssize_t K = Ndk.shape[1]
    cdef Py_ssize_t W = Nkw_mean.shape[1]
    cdef Py_ssize_t d, w, k, sim, zInit, zOld, zNew
    cdef Py_ssize_t rc_start = 0, rc_mid, rc_stop = K
    cdef double prob_sum, uni_rv
    cdef Py_ssize_t uni_idx = 0, tks_idx = 0
    cdef np.ndarray[dtype_t, ndim=1] cumprobs = np.linspace(0,1,K+1)[0:K]
    cdef np.ndarray[uitype_t, ndim=1] zd

    # Make sure the counts are initialised to zero
    # Ndk.fill(0)
    # Nkw_mean.fill(0)
    # Initialise the z_id for each document in the batch
    for d in range(D):
        zd = np.zeros(lengths[d], dtype=np.uint32)
        tks_idx = 0
        for w in wordtks[d]:
            uni_rv = uni_rvs[uni_idx] #np.random.rand() * prob_sum
            uni_idx += 1
            rc_start = 0
            rc_stop  = K
            while rc_start < rc_stop - 1:
                rc_mid = (rc_start + rc_stop) // 2
                if cumprobs[rc_mid] <= uni_rv:
                    rc_start = rc_mid
                else:
                    rc_stop = rc_mid
            #while uni_rv > cumprobs[rc_start]:
            #    rc_start += 1
            zInit    = rc_start
            Ndk[d,zInit] += 1
            zd[tks_idx] = zInit
            tks_idx += 1
        z[d] = zd

    # Draw samples from the posterior on z_ids using Gibbs sampling

    # burn-in phase
    for sim in range(burn_in):
        for d in range(D):
            tks_idx = 0
            for w in wordtks[d]:
                zOld = z[d][tks_idx]
                Ndk[d,zOld] -= 1
                prob_sum = 0
                # Faster than using numpy elt product
                for k in range(K):
                    cumprobs[k] = prob_sum
                    prob_sum +=  (alpha + Ndk[d,k]) * expElogbeta[k,w]
                uni_rv = prob_sum * uni_rvs[uni_idx]
                uni_idx += 1
		# inline randcat function call
                rc_start = 0
                rc_stop  = K
                while rc_start < rc_stop - 1:
                    rc_mid = (rc_start + rc_stop) // 2
                    if cumprobs[rc_mid] <= uni_rv:
                        rc_start = rc_mid
                    else:
                        rc_stop = rc_mid
                zNew = rc_start
                z[d][tks_idx] = zNew
                tks_idx += 1
                Ndk[d,zNew] += 1

    # sampling phase
    for sim in range(num_sim):
        for d in range(D):
            tks_idx = 0
            for w in wordtks[d]:
                zOld = z[d][tks_idx]
                Ndk[d,zOld] -= 1
                prob_sum = 0
                # Faster than using numpy elt product
                for k in range(K):
                    cumprobs[k] = prob_sum
                    prob_sum +=  (alpha + Ndk[d,k]) * expElogbeta[k,w]
                uni_rv = prob_sum * uni_rvs[uni_idx]
                uni_idx += 1
		# inline randcat function call
                rc_start = 0
                rc_stop  = K
                while rc_start < rc_stop - 1:
                    rc_mid = (rc_start + rc_stop) // 2
                    if cumprobs[rc_mid] <= uni_rv:
                        rc_start = rc_mid
                    else:
                        rc_stop = rc_mid
                zNew = rc_start
                z[d][tks_idx] = zNew
                tks_idx += 1
                Ndk[d,zNew] += 1
                Ndk_mean[d,zNew] += update_unit
                Nkw_mean[zNew,w] += update_unit


In [14]:
import numpy as n
from scipy.special import psi
def dirichlet_expectation(alpha):
    """
    For a vector theta ~ Dir(alpha), computes E[log(theta)] given alpha.
    """
    if (len(alpha.shape) == 1):
        return(psi(alpha) - psi(n.sum(alpha)))
    return(psi(alpha) - psi(n.sum(alpha, 1))[:, n.newaxis])

In [2]:
import numpy as n
from scipy.special import psi
def dirichlet_expectation(alpha):
    """
    For a vector theta ~ Dir(alpha), computes E[log(theta)] given alpha.
    """
#     if (len(alpha.shape) == 1):
#         return(psi(alpha) - psi(n.sum(alpha)))
    return(alpha**2 - (n.sum(alpha, 1)**2)[:, n.newaxis])

In [17]:
import numpy as n
from scipy.special import psi
from math import log
def sf(a):
    return a**1.5 + 1.5
def dirichlet_expectation(alpha):
    """
    For a vector theta ~ Dir(alpha), computes E[log(theta)] given alpha.
    """
#     if (len(alpha.shape) == 1):
#         return(psi(alpha) - psi(n.sum(alpha)))
    return (sf(alpha) - (sf(n.sum(alpha, 1))[:, n.newaxis]))

In [18]:
bda = 1*n.random.gamma(100., 1./100., (5 , 6))

In [19]:
print bda


[[ 1.09853738  0.96499392  1.10248365  1.17882871  0.98105439  0.91021905]
 [ 0.93599687  0.90140288  1.09070122  0.98028032  0.91733596  1.03132557]
 [ 0.96619847  1.13277934  0.99980484  1.14526639  0.97431978  0.94451184]
 [ 0.96960617  0.8235949   0.98463129  0.99024734  1.22386446  1.01755706]
 [ 0.96794406  1.06609893  0.97368769  0.81830179  0.95421755  1.05503989]]


In [20]:
debda = dirichlet_expectation(bda)
print debda

[[-14.42157856 -14.62501491 -14.4153688  -14.29306799 -14.60125143
  -14.70456992]
 [-13.26927325 -13.31900942 -13.03572978 -13.20425472 -13.29621862
  -13.12746706]
 [-14.34971499 -14.09380268 -14.29973632 -14.0738125  -14.33771555
  -14.38151033]
 [-13.7771045  -13.98443204 -13.75482622 -13.74645522 -13.37792019
  -13.70541111]
 [-13.14362397 -12.9951583  -13.13513517 -13.3556914  -13.16380904
  -13.01224185]]


In [12]:
%load_ext Cython


In [13]:
%%cython
import numpy as n
cimport numpy as n
ctypedef n.float64_t dtype_t
from scipy.special import psi
def dirichlet_expectation_1(n.ndarray[dtype_t, ndim=2] alpha):
#     if len(alpha.shape) == 1:
#         return psi(alpha) - psi(n.sum(alpha))
    return (psi(alpha) - psi(n.sum(alpha, 1))[:, n.newaxis]

In [28]:
%%cython
from __future__ import division
import sys
sys.path.insert(0,'/home/cuonghn/workspace/python/ipython/utils')
from ctypes import cdll
cimport float
lib = cdll.LoadLibrary('/home/cuonghn/workspace/python/ipython/utils/libdigamma.so')
cdef float a = 0.76
print type(a)
b = lib.digamma(a)
print b


Error compiling Cython file:
------------------------------------------------------------
...
from __future__ import division
import sys
sys.path.insert(0,'/home/cuonghn/workspace/python/ipython/utils')
from ctypes import cdll
cimport float
       ^
------------------------------------------------------------

/home/cuonghn/.cache/ipython/cython/_cython_magic_caaa11b51b4866791e8b0863ae6ca623.pyx:5:8: 'float.pxd' not found
