In [1]:
import numpy as np

Indian Buffet Process Prior
====

In [2]:
# Indian Buffet Process Function

def sampleIBP(alpha, num_objects):
    
    # Initializing storage for results
    result = np.zeros([num_objects, 1000])
    # Draw from the prior for alpha
    t = np.random.poisson(alpha)
    # Filling in first row of result matrix
    result[0, 0:t] = np.ones([1, t])
    # Initializing K+
    K_plus = t
    

    for i in range(1, num_objects):
        for j in range(0, K_plus):
            p = np.array([np.log(np.sum(result[0:i,j])) - np.log(i), 
                          np.log(i - np.sum(result[0:i, j])) - np.log(i)])
            p = np.exp(p - max(p))

            if(np.random.uniform() < p[0]/np.sum(p)):
                result[i, j] = 1
            else:
                result[i, j] = 0
        t = np.random.poisson(alpha/i)
        x = K_plus + 1
        y = K_plus + t
        result[i, (x-1):y] = np.ones([1, t]) # NEED TO CHECK DIM
        K_plus = K_plus+t
    result = result[:, 0:K_plus]
    return list([result, K_plus])

In [3]:
sampleIBP(5, 5)

[array([[ 1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  0.,  0.,  0.,  0.,
          0.],
        [ 1.,  1.,  1.,  0.,  0.,  1.,  0.,  1.,  0.,  1.,  1.,  1.,  0.,
          0.],
        [ 1.,  1.,  1.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,
          0.],
        [ 1.,  1.,  1.,  0.,  1.,  1.,  0.,  0.,  0.,  1.,  0.,  0.,  1.,
          1.]]), 14]

Inverse Simplification
====

In [4]:
# Function for enhancing inverse process

def calcInverse(Z, M, i, k, val):
    M_i = M - (np.dot(np.dot(np.dot(M, Z[i,:].T), Z[i,:]), M)) / (np.dot(np.dot(Z[i,:], M), Z[i,:].T) - 1)
    Z[i, k] = val
    M = M_i - (np.dot(np.dot(np.dot(M_i, Z[i,:].T), Z[i,:]), M_i)) / (np.dot(np.dot(Z[i,:], M_i), Z[i,:].T) + 1)
    Inv = M
    return(Inv)

def calcInverse1(Z, M, i, k, val):
    # Calculating M_i
    part1 = M
    part2 = np.dot(M, Z[i,:].T)
    part3 = np.dot(part2, Z[i,:])
    part4 = np.dot(part3, M)
    part5 = np.dot(Z[i,:], M)
    part6 = np.dot(part5, Z[i,:].T) - 1

    M_i = part1 - part4 / part6

    Z[i, k] = val

    #Calculating M
    part1 = M_i
    part2 = np.dot(M_i, Z[i,:].T)
    part3 = np.dot(part2, Z[i,:])
    part4 = np.dot(part3, M_i)
    part5 = np.dot(Z[i,:], M_i)
    part6 = np.dot(part5, Z[i,:].T) + 1

    M = part1 - part4 / part6

    Inv = M
    return(Inv)

In [None]:
calcInverse(Z[:, 0:K_plus], M, i, k, 1)
print("K_plus = ", K_plus)
print("Z[:, 0:K_plus] = ", Z[:, 0:K_plus])
print("M = ", M)

Likelihood Function 
====

In [26]:
## Define the likelihood function ##
# The function returns the log likelihood
def likelihood(X, Z, M, sigma_A, sigma_X, K_plus, num_objects, object_dim):
    part1 = - (num_objects*(object_dim/2)*np.log(2*np.pi))
    part2 = - (num_objects-K_plus)* object_dim *np.log(sigma_X) 
    part3 = - (object_dim*K_plus)*np.log(sigma_A) 
    part4 = - (object_dim/2)* np.log(np.dot(np.transpose(Z),Z) + (sigma_X/sigma_A)**2 * np.identity(K_plus))
    part5 = (-1/(2*sigma_X**2)) * np.trace(np.dot(np.dot(np.transpose(X),(np.identity(num_objects) - np.dot(np.dot(Z,M),np.transpose(Z)))),X))
    total = part1+part2+part3+part4+part5
    return(total)

def likelihood1(X, Z, M, sigma_A, sigma_X, K_plus, num_objects, object_dim):
    log_ll = -1 * num_objects * object_dim * .5 * np.log(2*np.pi) - 1 * (num_objects - K_plus) * object_dim * np.log(sigma_X) - K_plus * object_dim * np.log(sigma_A) - object_dim * .5 * np.log(np.linalg.det((np.dot(Z.T, Z) + (sigma_X**2/sigma_A**2)*np.eye(K_plus)))) + (-1/(2*sigma_X**2)) * np.trace(np.dot(np.dot(X.T, np.eye(num_objects) - np.dot(Z, np.dot(M, Z.T))), X))
    return(log_ll)

In [None]:
O = np.array([[2,2], [1,1]])
np.linalg.det(O)

Data Generation
====

In [6]:
from scipy.stats import multivariate_normal as mvtnorm
from scipy.stats import bernoulli
import numpy as np

### Data Simulation ###

## Features/Latent Variables #
#Each row of the weight matrix "W" specifies one base images(feature/latent variables)"

W = np.array([[0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
              [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0]])


#Each image in our simulated data set is the superposition of four base images#
# Number of images/ data points
num_objects=100

#Dimension of image (6x6)
object_dim  = 6*6

#Covariance matrix for images/ white noise
sigma_x_orig = 0.5
I = sigma_x_orig * np.identity(object_dim)

#z_i - binary feature matrix (1 x 4) - each entry set to 1 with probability 0.5 and 0 otherwise#
#x is data variable - each row correspondes to a superimposed built from a random combination of latent features#
#with white noise added - x is built with multivariate gaussian#
x = np.zeros((100,36))
z = np.zeros((100,4))

for i in range(0,num_objects):
    z[i,:] = np.array([bernoulli.rvs(p=0.5, size=4)])
    x[i,:] = mvtnorm.rvs(np.dot(z[i,:],W), I)

The Sampler
====

In [77]:
# GENERAL FUNCTION SAMPLER

# Harmonic number or N
HN = 0
for i in range(0, num_objects):
    HN = HN + 1/(i+1)

E = 1000
BURN_IN = 0
SAMPLE_SIZE = 1000

# Initializing values for use in our chain
sigma_A = 1
sigma_X = 1
# Poisson rate
alpha = 1
# Prespecified maximum number of latent features
K_inf = 10
# Indian Buffet Process Prior
Z, K_plus = sampleIBP(alpha, num_objects)
# Initialization of our chain for Z
chain_Z = np.zeros([SAMPLE_SIZE, num_objects, K_inf])
# Initialization of our chain for K
chain_K = np.zeros([SAMPLE_SIZE, 1])
# Initialization of our chain for sigma_X
chain_sigma_X = np.zeros([SAMPLE_SIZE, 1])
# Initialization of our chain for sigma_A
chain_sigma_A = np.zeros([SAMPLE_SIZE, 1])
# Initialization of our chain for alpha
chain_alpha = np.zeros([SAMPLE_SIZE, 1])

# Initializing storage for post burn-in samples
s_counter = 0
for e in range(0, E):
    if(e > BURN_IN):
        s_counter = s_counter + 1
        chain_Z[s_counter, :, 0:K_plus] = Z[:, 0:K_plus]
        chain_K[s_counter] = K_plus
        chain_sigma_X[s_counter] = sigma_X
        chain_sigma_A[s_counter] = sigma_A
        chain_alpha[s_counter] = alpha
    #print("At iteration", e, ": K_plus is", K_plus, ", alpha is", alpha) 

    for i in range(0, num_objects):
        # M matrix will be handy for future computations
        # SOMETIMES SINGULAR, NEED TO FIX
        M = np.linalg.inv(np.dot(Z[:, 0:K_plus].T, Z[:, 0:K_plus]) + (sigma_X**2/sigma_A**2) * np.eye(K_plus))
        for k in range(0, K_plus):
            # Checking to make sure that k < K_plus
            if k >= K_plus:
                break
            if Z[i, k] > 0:
                # Take care of singularities
                if np.sum(Z[:, k]) - Z[i, k] <= 0:
                    Z[i, k] = 0
                    Z[:, k:(K_plus - 1)] = Z[:, (k+1):K_plus]
                    K_plus = K_plus - 1
                    M = np.linalg.inv(np.dot(Z[:, 0:K_plus].T, Z[:, 0:K_plus]) + (sigma_X**2/sigma_A**2) * np.eye(K_plus))
                    continue
             
            # This is where he has his calcInverse functions to 
            # speed up inverse calculations. I just use inverse
            # of Z matrix below
            M1 = calcInverse(Z[:, 0:K_plus], M, i, k, 1)
            M2 = calcInverse(Z[:, 0:K_plus], M, i, k, 0)

            # Compute conditional distributions for the current cell in Z
            P = []
            Z[i, k] = 1
            P.append(likelihood1(x, Z[:, 0:K_plus], M1, sigma_A, sigma_X, K_plus, num_objects, object_dim) + np.log(sum(Z[:, k]) - Z[i, k]) - np.log(num_objects))

            Z[i, k] = 0
            P.append(likelihood1(x, Z[:, 0:K_plus], M2, sigma_A, sigma_X, K_plus, num_objects, object_dim) + np.log(num_objects - sum(Z[:,k])) - np.log(num_objects))
            
            P = np.exp(P - np.max(P))

            # Sample from the conditional
            if np.random.uniform() < P[0] / (P[0] + P[1]):
                Z[i, k] = 1
                M = M1
            else:
                Z[i, k] = 0
                M = M2
        
        # Sample the number of new dishes for the current object
        trun = np.zeros([1, 5])
        alpha_N = alpha / num_objects


################### ERROR IN DIMENSIONALITY RIGHT HERE ###########################        
        for k_i in range(0, 5):
            Z[i, K_plus:(K_plus+k_i)] = 1
            M = np.linalg.inv(np.dot(Z[:, 0:(K_plus + k_i)].T, Z[:, 0:(K_plus+k_i)]) + (sigma_X**2/sigma_A**2) * np.eye(K_plus + k_i)) # MINUS ONE TO FIX DIMENSIONALITY MAY BE INCORRECT
        

ValueError: operands could not be broadcast together with shapes (4,4) (5,5) 

In [80]:
#np.linalg.inv(np.dot(Z[:, 0:(K_plus + k_i)].T, Z[:, 0:(K_plus+k_i)]) + (sigma_X**2/sigma_A**2) * np.eye(K_plus + k_i))
#np.dot(Z[:, 0:(K_plus + k_i)].T, Z[:, 0:(K_plus+k_i)]) + (sigma_X**2/sigma_A**2) * np.eye(K_plus + k_i-1)
k_i
i
Z[i, K_plus:(K_plus+k_i)] = 1
Z[i, K_plus:(K_plus+k_i)]
K_plus
Z[i, :]
Z[:, 0:(K_plus + k_i)]
k_i 
Z.shape
K_plus

4

In [54]:
for k_i in range(0, 5):
            Z[i, K_plus:(K_plus+k_i)] = 1
            M = np.linalg.inv(np.dot(Z[:, 0:(K_plus + k_i)].T, Z[:, 0:(K_plus+k_i)]) + (sigma_X**2/sigma_A**2) * np.eye(K_plus + k_i))

ValueError: operands could not be broadcast together with shapes (4,4) (5,5) 