## Implementation of Infinite Latent Feature Models and the Indian Buffet Process

In [1]:
import numpy as np
import scipy as sp
import math
import matplotlib.pyplot as plt
from __future__ import division
plt.style.use('ggplot')
import Image
import matplotlib.cm as cm
%matplotlib inline
%precision 4
import time

###### Simulate data

In [2]:
np.random.seed(1)

N = 100 #number of objects
K = 4 #true number of features
D = 36 # dimension of feature


sigmaX0 = .5;
A = np.array((0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
             0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, \
             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0)).reshape(4, D)


I = (sigmaX0)*np.identity(D)
Z0 = np.zeros((N, K))
X = np.zeros((N, D))
for i in range(N):
    Z0[i,:] = (np.random.uniform(0,1,K) > .5).astype(int)
    while (np.sum(Z0[i,:]) == 0):
        Z0[i,:] = (np.random.uniform(0,1,K) > .5).astype(int)
    #X[i,:] = np.random.multivariate_normal(Z0[i,:].dot(A), I)
    #X(i,:) = randn(1, object_dim)*I+Z_orig(i,:)*A;
    X[i,:] = np.random.normal(0,1, (1,D)).dot(I)+Z0[i,:].dot(A)

###### Set initial numbers, dimensions and features

###### Sample prior

In [3]:
np.random.seed(1)
def sampleIBP(alpha, N):
    result = np.zeros((N, 1000))
    t = np.random.poisson(alpha)
    if t>0:
        result[0,0:t] = np.ones(t)
    Kplus = t
    for i in range(1,N):
        for j in range(Kplus):
            p = np.sum(result[0:i,j])/(i+1)
            if np.random.uniform(0,1) < p:
                result[i,j] = 1
        t = np.random.poisson(alpha/(i+1))
        if t>0:
            result[i,Kplus:Kplus+t] = np.ones(t)
            Kplus = Kplus+t
    result = result[:,0:Kplus]
    return np.array((result, Kplus))


In [10]:
def calcInverse(Z,M,i,k,val):
    #Z[i,k] = val
    #M_i = M - M.dot(Z[i,:].T.dot(Z[i,:].dot(M)))/(Z[i,:].dot(M.dot(Z[i,:].T))-1)
    M_i = M-np.dot(np.dot(np.dot(M,Zn[i,:].T),Zn[i,:]),M)/(np.dot(np.dot(Zn[i,:],M),Zn[i,:].T)-1)
    #Z[i,k] = val
    Zn[i,k] = val
    #M = M_i - M_i.dot(Z[i,:].T.dot(Z[i,:].dot(M_i)))/(Z[i,:].dot(M_i.dot(Z[i,:].T))+1)
    M = M_i-np.dot(np.dot(np.dot(M_i,Zn[i,:].T),Zn[i,:]),M_i)/(np.dot(np.dot(Zn[i,:],M_i),Zn[i,:].T)+1)
    Inv = M
    return Inv

##### Likelihood function

In [5]:
# define a log likelihood function 
def ll_m(X, Z, sigmaX, sigmaA, K, D, N, M):
    M1 = Z.T.dot(Z)+(sigmaX**2/sigmaA**2)*np.identity(K)
    return (-1)*np.log(2*np.pi)*N*D*.5 - np.log(sigmaX)*(N-K)*D - np.log(sigmaA)*K*D - .5*D*np.log(np.linalg.det(M1)) \
        -.5/(sigmaX**2)*np.trace( (X.T.dot( np.identity(N)-Z.dot(M.dot(Z.T)) )).dot(X) )

In [7]:
HN = 0.
for i in range(1,N+1):
    HN += 1./i
    
#Kplus = 4 #current number of features with at least one object
niter = 400
sigmaX = 1.
sigmaA = 1.
alpha = 1.
maxNew = 4
BURN_IN=200

In [8]:

SAMPLE_SIZE=niter-BURN_IN

K_inf=50

chain_Z=np.zeros((SAMPLE_SIZE,N,K_inf))
chain_K=np.zeros((SAMPLE_SIZE,1))
chain_sigma_X=np.zeros((SAMPLE_SIZE,1))
chain_sigma_A=np.zeros((SAMPLE_SIZE,1))
chain_alpha=np.zeros((SAMPLE_SIZE,1))

In [11]:

t0 = time.time()
np.random.seed(1)
Z, Kplus = sampleIBP(alpha, N)
s_counter=0

#increase the size of Z upto Z_inf
addCols = np.zeros((N,(K_inf-Kplus)))
Z= np.hstack((Z,addCols))


for j in range(niter):
    print("iteration:",j ,  "Kplus:",Kplus,  "shape of Z", Z.shape, "alpha:", alpha, "sigmaX", sigmaX)
    #update z
#     if((j+1)>BURN_IN):
#         chain_Z[s_counter,:,0:Kplus]=Z
#         chain_K[s_counter]=Kplus
#         chain_sigma_X[s_counter]=sigmaX
#         chain_sigma_A[s_counter]=sigmaA
#         chain_alpha[s_counter]=alpha
#         s_counter=s_counter+1
    
    for i in range(1):
        M = np.linalg.inv(Z[:,0:Kplus].T.dot(Z[:,0:Kplus])+(sigmaX**2/sigmaA**2)*np.identity(Kplus))
        for k in range(1):
            #print k
            if k>=Kplus:
                break     
            #Removing the singular features, i.e. the ones that have 1 for the current object only.
            if Z[i,k] >= 0:
                if (np.sum(Z[:,k])- Z[i,k]) <=0:
                    #Z[i,k] = 0
                    Z[:,k:(Kplus-1)] = Z[:,(k+1):Kplus] #shift everything one column to the left
                    Kplus = Kplus-1
                    #Z = Z[:,0:Kplus] # remove the last column as it is redundent
                    M = np.linalg.inv(Z[:,0:Kplus].T.dot(Z[:,0:Kplus])+(sigmaX**2/sigmaA**2)*np.identity(Kplus))
                    continue #We're no longer looking at this feature, so move to another one               
        
            P = np.zeros(2)
            
            M0 = calcInverse(Z[:,0:Kplus],M,i,k,0)
            print "calcinverse \n", M0
            M1 = calcInverse(Z[:,0:Kplus],M,i,k,1)
            #set Z[i,k] = 0 and calculate posterior probability
            Z[i,k] = 0
            M0_0 = np.linalg.inv(Z[:,0:Kplus].T.dot(Z[:,0:Kplus])+(sigmaX**2/sigmaA**2)*np.identity(Kplus))
            print "normal \n",M0_0
            P[0] = ll_m(X, Z[:,0:Kplus] , sigmaX, sigmaA, Kplus, D, N, M0) + np.log(N-np.sum(Z[:,k])) - np.log(N)
        
            #set Z[i,k] = 1 and calculate posterior probability
            Z[i,k] = 1
            M1 = np.linalg.inv(Z[:,0:Kplus].T.dot(Z[:,0:Kplus])+(sigmaX**2/sigmaA**2)*np.identity(Kplus))
            P[1] = ll_m(X, Z[:,0:Kplus],sigmaX, sigmaA, Kplus, D, N, M1)  + np.log(np.sum(Z[:,k])- 1) - np.log(N)
        
            P = np.exp(P - max(P))

            U = np.random.uniform(0,1)
            if U<(P[0]/(np.sum(P))):
                Z[i,k] = 0
                M = M0
            else:
                Z[i,k] = 1  
                M = M1    
        #Sample number of new features
        prob = np.zeros(maxNew)
        alphaN = alpha/N
        for kNew in range(maxNew): # max new features is 3
            
            Z_temp = Z
#             if kNew>0:
#                 addCols = np.zeros((N,kNew))
#                 addCols[i,:] = 1
#                 Z_temp = np.hstack((Z_temp, addCols))
            Z_temp[:,Kplus:(Kplus+kNew)]=0
            Z_temp[i,Kplus:(Kplus+kNew)]=1
            pois = kNew*np.log(alphaN) - alphaN - np.log(math.factorial(kNew))
            M = np.linalg.inv(Z_temp[:,0:(Kplus+kNew)].T.dot(Z_temp[:,0:(Kplus+kNew)])+(sigmaX**2/sigmaA**2) \
                              *np.identity(Kplus+kNew))
            lik = ll_m(X = X, Z = Z_temp[:,0:(Kplus+kNew)], sigmaX = sigmaX, sigmaA = sigmaA, K=(Kplus+kNew), D= D, N= N, M=M)
            prob[kNew] = pois + lik

        #normalize prob
        Z[:,(Kplus+1):(Kplus+maxNew)] = 0
        prob = np.exp(prob - max(prob))
        prob = prob/sum(prob)
#         kNew = np.where(prob==max(prob))[0]
        U = np.random.uniform(0,1,1)
        p = 0
        kNew=0
        for new in range(maxNew):
            p = p+prob[new]
            if U<p:
                kNew = new
                break
        Z[i,Kplus:(Kplus+kNew)]=1
         
        #Add kNew new columns to Z and set the values at ith row to 1 for all of them
#         if kNew>0:
#             addCols = np.zeros((N,kNew))
#             addCols[i,:] = 1
#             Z = np.hstack((Z, addCols))
        Kplus = Kplus + kNew
    
    M = np.linalg.inv(Z[:,0:Kplus].T.dot(Z[:,0:Kplus])+(sigmaX**2/sigmaA**2)*np.identity(Kplus))
    
    llCurrent = ll_m(X, Z[:,0:Kplus], sigmaX, sigmaA, Kplus, D, N, M )
    #update sigmaX
    if np.random.uniform(0,1) < .5:
        sigmaX_new = sigmaX - np.random.uniform(0,1)/20
    else:
        sigmaX_new = sigmaX + np.random.uniform(0,1)/20
    
    M = np.linalg.inv(Z[:,0:Kplus].T.dot(Z[:,0:Kplus])+(sigmaX_new**2/sigmaA**2)*np.identity(Kplus))
    llNew = ll_m(X, Z[:,0:Kplus], sigmaX_new, sigmaA, Kplus, D, N, M)

    arX = np.exp(min(0,llNew-llCurrent))
    U = np.random.uniform(0,1)
    if U < arX:
        sigmaX = sigmaX_new
        
        
    #update sigma_A
    #epsA = np.random.uniform(0,1)
    if np.random.uniform(0,1) < .5:
        sigmaA_new = sigmaA - np.random.uniform(0,1)/20
    else:
        sigmaA_new = sigmaA + np.random.uniform(0,1)/20
    M = np.linalg.inv(Z[:,0:Kplus].T.dot(Z[:,0:Kplus])+(sigmaX**2/sigmaA_new**2)*np.identity(Kplus))
    llNew = ll_m(X, Z[:,0:Kplus], sigmaX, sigmaA_new, Kplus, D, N, M)

    arA = np.exp(min(0,llNew-llCurrent))

    U = np.random.uniform(0,1)
    if U < arA:
        sigmaA = sigmaA_new
        
    alpha = np.random.gamma(1+Kplus, 1/(1+HN))
t1 = time.time()
timeElapsed = t1-t0

('iteration:', 0, 'Kplus:', 4, 'shape of Z', (100, 50), 'alpha:', 1.0, 'sigmaX', 1.0)
calcinverse 
[[ 0.0146 -0.0109 -0.0073 -0.0122]
 [-0.0109  0.2618  0.0055  0.0091]
 [-0.0073  0.0055  0.2573  0.0061]
 [-0.0122  0.0091  0.0061  0.1792]]
normal 
[[ 0.0146 -0.0109 -0.0073 -0.0122]
 [-0.0109  0.2582  0.0055  0.0091]
 [-0.0073  0.0055  0.2536  0.0061]
 [-0.0122  0.0091  0.0061  0.1768]]
('iteration:', 1, 'Kplus:', 4, 'shape of Z', (100, 50), 'alpha:', 0.6348743285308224, 'sigmaX', 1.0)
calcinverse 
[[ 0.0146 -0.0109 -0.0073 -0.0122]
 [-0.0109  0.2582  0.0055  0.0091]
 [-0.0073  0.0055  0.2536  0.0061]
 [-0.0122  0.0091  0.0061  0.1768]]
normal 
[[ 0.0146 -0.0109 -0.0073 -0.0122]
 [-0.0109  0.2582  0.0055  0.0091]
 [-0.0073  0.0055  0.2536  0.0061]
 [-0.0122  0.0091  0.0061  0.1768]]
('iteration:', 2, 'Kplus:', 4, 'shape of Z', (100, 50), 'alpha:', 0.8715855405289847, 'sigmaX', 1.0)
calcinverse 
[[ 0.0145 -0.0106 -0.0071 -0.0119]
 [-0.0106  0.2554  0.0052  0.0087]
 [-0.0071  0.0052  0.25

In [None]:
plt.plot(chain_sigma_A)
np.mean(chain_sigma_A)

In [None]:
from numbapro import cuda, vectorize, guvectorize, check_cuda
from numbapro import void, uint8 , uint32, uint64, int32, int64, float32, float64, f8

In [34]:
A = np.array([1,2,3,4,5,3,2])
np.where(A==max(A))[0]

array([4])

In [76]:
np.identity(5)[:,0:5]

array([[ 1.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  0.,  1.]])

### Unit testing

To check if the code is working properly some of the unit testings I've come up with so far are:
* Probabilities calculated for the presence of feature have to be between 0 and 1.
* 