In [1]:
#Uploading required packages
import numpy as np
from scipy.stats import gamma
from scipy.stats import uniform as unif
from scipy.stats import multivariate_normal as mvtnorm
from scipy.stats import bernoulli
from numba import jit
import matplotlib.pyplot as plt
%matplotlib inline

Indian Buffet Process Function
====

In [2]:
# Indian Buffet Process Function

def sampleIBP(alpha, num_objects):  
    # Initializing storage for results
    result = np.zeros([num_objects, 1000])
    # Draw from the prior for alpha
    t = np.random.poisson(alpha)
    # Filling in first row of result matrix
    result[0, 0:t] = np.ones(t) #changed form np.ones([1, t])
    # Initializing K+
    K_plus = t
    
    for i in range(1, num_objects):
        for j in range(0, K_plus):
            p = np.array([np.log(np.sum(result[0:i,j])) - np.log(i+1), 
                          np.log(i+1 - np.sum(result[0:i, j])) - np.log(i+1)])
            p = np.exp(p - max(p))

            if(np.random.uniform() < p[0]/np.sum(p)):
                result[i, j] = 1
            else:
                result[i, j] = 0
        t = np.random.poisson(alpha/(i+1))
        x = K_plus + 1
        y = K_plus + t
        result[i, (x-1):y] = np.ones(t) #changed form np.ones([1, t])
        K_plus = K_plus+t
    result = result[:, 0:K_plus]
    
    return list([result, K_plus])

Data Simulation
====

In [3]:
#Data Simulation

#Latent Features
W = np.array([[0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
              [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0]])


#Each image in our simulated data set is the superposition of four base images#
# Number of images/ data points
num_objects=100

#Dimension of image (6x6)
object_dim  = 6*6

#Covariance matrix for images/ white noise
sigma_x_orig = 0.5
I = sigma_x_orig * np.identity(object_dim)

#z_i - binary feature matrix (1 x 4) - each entry set to 1 with probability 0.5 and 0 otherwise#
#x is data variable - each row correspondes to a superimposed built from a random combination of latent features#
#with white noise added - x is built with multivariate gaussian#
image_data = np.zeros((100,36))
z_org = np.zeros((100,4))

for i in range(0,num_objects):
    z_org[i,:] = np.array([bernoulli.rvs(p=0.5, size=4)])
    image_data[i,:] = np.dot(z_org[i,:],W) + np.random.normal(0,1, (1,object_dim)).dot(I) 

Implementation
====

We will be using an infinite Gaussian binary latent feature model with an Indian Buffet Process prior to model the number of latent features present in our simulated image dataset. We follow the example as discussed in Griffiths and Ghahramani(2005).

We begin by defining a binary feature ownership matrix $\textbf{Z}$ that represents whether or not each feature is present given the observations $\textbf{X}$. Therefore each D-dimensional object $\textbf{x_i}$ has a Gaussian distribution:
$$x_i \sim Normal(z_i A, \Sigma_X)$$

Where $\textbf{A}$ is a $\textit{K x D}$ matrix of weights representing the $\textit{K}$ latent features. The noise introduced into our simulated images is represented by the covariance $\Sigma_X$. The prior on $\textbf A$ is matrix Gaussian as well with mean 0 and covariance $\Sigma_A$. We then integrate out $\textbf A$ to generate the data likelihood:

$$P(X|Z,\sigma_X, \sigma_A) = \frac{1}{(2 \pi)^{ND/2} (\sigma_X)^{(N-K)D}(\sigma_A)^{KD}(|Z^TZ+\frac{\sigma_X^2}{\sigma_A^2}I|)^{D/2}} exp\{-\frac{1}{2\sigma_X^2}tr(X^T(I-Z(Z^TZ+\frac{\sigma_X^2}{\sigma_A^2}I)^{-1}Z^T)X)\}$$

### Gibbs Sampling and Metropolis-Hastings Algorithm

We now implement both Gibbs Sampling and Metropolis-Hastings to perform inference on our image dataset.

#### Parameters of Interest

We have five parameters of interest in our model that we need to update throughout our MCMC process.

1.) Z: feature ownernship matrix

2.) $K_+$: number of new latent features

3.) $\alpha$ parameter for  $K_+$

4.) $\sigma_x$

5.) $\sigma_A$

We are able to find known full conditional distributions for Z, $K_+$, and $\alpha$, so we will update them using Gibbs Sampling. We will update $\sigma_X$ and $\sigma_A$ using random-walk Metropolis-Hastings.

#### Prior Distributions

We begin by setting a prior on on our parameter controlling $K_+$, $\alpha$:

$$\alpha \sim Gamma(1,1)$$

We then set a prior on our latent feature binary matrix Z using the Indian Buffet Process prior:

$$P(z_{ik} = 1 | \textbf{z}_{-i,k}) = \dfrac{n_{-i,k}}{N}$$

Finally, we set a Poisson prior on the number of latent features $K_+$:

$$K_+ \sim Poisson(\dfrac{\alpha}{N})$$

#### Full Conditional Distributions Used  for Gibbs Sampling

Now that we have defined our likelihood and selected our priors, we will now define our full conditional distributions for Z, $K_+$, and $\alpha$ to be used in Gibbs Sampling. Beginning with Z, we find the full conditional distribution to be:

$$P(z_{ik}|X,Z_{-(i,k),},\sigma_X,\sigma_A) \propto  P(X|Z,\sigma_X, \sigma_A) * P(z_{ik}=1|\textbf{z}_{-i,k})$$

To sample the number of new features $K_+$ for observation $i$, we use our data likelihood and our Poisson$(\dfrac{\alpha}{N})$ prior for $K_+$ and truncate this distribution for a range of values of $K_+$ up to 4 new features. We then use this to compute the probability distribution for $K_+$ and sample the number of new features from this distribution.

#### Metropolis-Hasting Updates

To update $\sigma_X$ and $\sigma_A$, we use random walk Metropolis-Hastings steps. For $\sigma_X$, we generate a random value from a Uniform(-.05, .05) distribution and add this value to our current value of $\sigma_X$ to get $\sigma_X^*$. We then accept our new value of $\sigma_X$ with probability:

$$p = min(1, \dfrac{P(X|Z, \sigma_X^*, \sigma_A}{P(X|Z, \sigma_X, \sigma_A})$$

To update $\sigma_A$, we follow the same proceedure as with $\sigma_X$, replacing $\sigma_X$ with $\sigma_A$.


Profiling & Optimization
====

### Functions Used within the Sampler

#### Likelihood Function

The likelihood function is used to compute $P(X|Z,\sigma_X, \sigma_A)$.

In [4]:
# Likelihood function used to compute P(X | Z, sigma_X, sigma_A)
def likelihood(X, Z, M, sigma_A, sigma_X, K_plus, num_objects, object_dim):
    part1 = (-1)*num_objects*(0.5*object_dim)*np.log(2*np.pi)
    part2 = (-1)*(num_objects-K_plus)* object_dim *np.log(sigma_X) 
    part3 = (-1)*object_dim*K_plus*np.log(sigma_A) 
    part4 = (-1)*(0.5*object_dim)* np.log(np.linalg.det((np.dot(Z.T, Z) + (sigma_X**2/sigma_A**2)*np.eye(K_plus)))) 
    part5 = (-1/(2*sigma_X**2)) * np.trace(np.dot(np.dot(X.T,(np.identity(num_objects) - np.dot(np.dot(Z,M),Z.T))),X))
    total = part1+part2+part3+part4+part5
    return(total)

#### Mcalc Function

The Mcalc function is used to compute $(Z^T Z + (\dfrac{\sigma_X^2}{\sigma_A^2}) * I)^{-1}$ which is used in several calculations throughout the sampler.

In [5]:
def Mcalc(Z, sigma_X, sigma_A, K_plus):
    M = np.linalg.inv(np.dot(Z.T, Z) + (sigma_X**2/sigma_A**2) * np.eye(K_plus))
    return(M)

#### Met_zval Function

The Met_zval function is used to update $z_{i,k}$ using Gibbs Sampling.

In [6]:
#This function samples new value of Z[i,k] using Gibbs Sampling
def Met_zval(data, Z, sigma_X, sigma_A, K_plus, num_objects, object_dim, i, k):    
    
    P=np.zeros(2)

    Z[i,k]=1
    #Compute posterior density of new_sample
    M = Mcalc(Z, sigma_X, sigma_A, K_plus)
    P[0] = likelihood(data, Z, M, sigma_A, sigma_X, K_plus, num_objects, object_dim) + np.log(np.sum(Z[:, k]) - Z[i,k]) - np.log(num_objects)

    #Set new_sample to 0
    Z[i,k]=0
    #Computer posterior density of new_sample
    M = Mcalc(Z, sigma_X, sigma_A, K_plus)
    P[1] = likelihood(data, Z, M, sigma_A, sigma_X, K_plus, num_objects, object_dim) + np.log(num_objects - np.sum(Z[:,k])) - np.log(num_objects)

    P = np.exp(P - np.max(P))

    if np.random.uniform(0,1) < (P[0]/(np.sum(P))):
        new_sample = 1
    else:
        new_sample = 0
    
    return(new_sample)

#### New_dishes Function

The New_dishes function samples the number of new features for observation $i$ using Gibbs Sampling.

In [7]:
def New_dishes(data, Z, sigma_X, sigma_A, K_plus, alpha, num_objects, object_dim, trunc_val,i):
    trunc = np.zeros(trunc_val)
    alpha_N = alpha/num_objects

    for k_i in range(0,trunc_val):
        Z_temp = Z
        if k_i>0:
            newcol = np.zeros((num_objects, k_i))
            newcol[i,:] = 1 
            Z_temp = np.column_stack((Z_temp, newcol))
        M = Mcalc(Z_temp, sigma_X, sigma_A, K_plus+k_i)
        trunc[k_i] = k_i * np.log(alpha_N) - alpha_N - np.log(np.math.factorial(k_i)) + likelihood(data, Z_temp, M, sigma_A, sigma_X, K_plus+k_i, num_objects, object_dim)

    trunc = np.exp(trunc - np.max(trunc))
    trunc = trunc/np.sum(trunc)

    p = np.random.uniform(0,1)
    t = 0
    new_dishes = 0

    for k_i in range(0,trunc_val):
        t = t + trunc[k_i]
        if p < t:
            new_dishes = k_i
            break
            
    return(new_dishes)

#### Met_sigma Function

The Met_sigma function updates both $\sigma_X$ and $\sigma_A$ using random-walk Metropolis-Hastings.

In [8]:
def Met_sigma(data, Z, sigma_X, sigma_A, K_plus, num_objects, object_dim):
    
    M = Mcalc(Z, sigma_X, sigma_A, K_plus)  
    lik_curr = likelihood(data, Z, M, sigma_A, sigma_X, K_plus, num_objects, object_dim)

    if np.random.uniform(0,1) < 0.5:
        sigma_X_new = sigma_X - np.random.uniform(0,1)/20
    else:
        sigma_X_new = sigma_X + np.random.uniform(0,1)/20

    M = Mcalc(Z, sigma_X_new, sigma_A, K_plus)
    lik_new_X = likelihood(data, Z, M, sigma_A, sigma_X_new, K_plus, num_objects, object_dim)

    acc_X = np.exp(min(0, lik_new_X - lik_curr))

    if np.random.uniform(0,1) < 0.5:
        sigma_A_new = sigma_A - np.random.uniform(0,1)/20
    else:
        sigma_A_new = sigma_A + np.random.uniform(0,1)/20

    M = Mcalc(Z, sigma_X, sigma_A_new, K_plus)
    lik_new_A = likelihood(data, Z, M, sigma_A_new, sigma_X, K_plus, num_objects, object_dim)

    acc_A = np.exp(min(0, lik_new_A - lik_curr))
    
    sigma_X_val=0
    sigma_A_val=0

    if np.random.uniform(0,1) < acc_X:
        sigma_X_val = sigma_X_new
    else:
        sigma_X_val = sigma_X
    
    if np.random.uniform(0,1) < acc_A:
        sigma_A_val = sigma_A_new
    else:
        sigma_A_val = sigma_A
        
    return list([sigma_X_val, sigma_A_val])

#### Sampler Function

The Sampler function combines all of the above functions to run the entirety of our MCMC algorithm. It outputs the posterior distibutions for Z, $K_+$, $\sigma_X$, $\sigma_A$, and $\alpha$. 

In [14]:
def Sampler(data, num_objects, object_dim, E=1000,  K_inf = 20, sigma_X = 1, sigma_A = 1, alpha = 1, trunc_val=5):
    #Set storage arrays for sampled parameters
    chain_Z = np.zeros([E, num_objects, K_inf])
    chain_K = np.zeros([E, 1])
    chain_sigma_X = np.zeros([E, 1])
    chain_sigma_A = np.zeros([E, 1])
    chain_alpha = np.zeros([E, 1])

    #Initialize parameter values
    num_object= np.shape(data)[0]
    object_dim = np.shape(data)[1]

    K_plus = 0
    while K_plus == 0:
        [Z, K_plus] = sampleIBP(alpha, num_objects)

    #Compute Harmonic Number
    HN = 0
    for i in range(0, num_objects):
        HN = HN + 1.0/(i+1)

    for e in range(0, E):
        #Store sampled values
        chain_Z[e, :, 0:K_plus] = Z[:, 0:K_plus]
        chain_K[e] = K_plus
        chain_sigma_X[e] = sigma_X
        chain_sigma_A[e] = sigma_A
        chain_alpha[e] = alpha

        #if (e%100==0):
        #    print(e)
        print("At iteration", e, ": K_plus is", K_plus, ", alpha is", alpha) 

        #Generate a new value for Z[i,k] and accept by Metropolis
        for i in range(0, num_objects):
            #First we remove singular features if any
            for k in range(0, K_plus):
                if (k>=K_plus):
                    break
                if(Z[i, k] > 0):
                    if (np.sum(Z[:, k]) - Z[i, k]) <= 0: 
                        Z[i, k] = 0
                        Z[:, k:(K_plus - 1)] = Z[:, (k+1):K_plus]
                        K_plus = K_plus - 1
                        Z = Z[:, 0:K_plus]
                        continue
                #Compute conditional distribution for current cell
                Z[i,k] = Met_zval(data, Z, sigma_X, sigma_A, K_plus, num_objects, object_dim, i, k)

            #Sample new dishes by Metropolis
            new_dishes = New_dishes(data, Z, sigma_X, sigma_A, K_plus, alpha, num_objects, object_dim, trunc_val,i)

            if(new_dishes > 0):
                newcol = np.zeros((num_objects, new_dishes))
                newcol[i,:] = 1
                Z = np.column_stack((Z, newcol))
            K_plus = K_plus + new_dishes

        #Sample sigma_X and sigma_A through Metropolis
        [sigma_X, sigma_A] = Met_sigma(data, Z, sigma_X, sigma_A, K_plus, num_objects, object_dim)
        #Sample alpha via Gibbs
        alpha = np.random.gamma(1 + K_plus, 1/(1+HN))
    
    print("Complete")
    return list([chain_Z, chain_K, chain_sigma_X, chain_sigma_A, chain_alpha])

In [15]:
dog = Sampler(image_data, num_objects, object_dim, E=100,  K_inf = 20, sigma_X = 1, sigma_A = 1, alpha = 1, trunc_val=5)

At iteration 0 : K_plus is 5 , alpha is 1
At iteration 1 : K_plus is 2 , alpha is 1.491787700240985
At iteration 2 : K_plus is 2 , alpha is 0.5755998482400341
At iteration 3 : K_plus is 2 , alpha is 0.22085176428487893
At iteration 4 : K_plus is 2 , alpha is 0.5890523479952683
At iteration 5 : K_plus is 2 , alpha is 0.2585216472795352
At iteration 6 : K_plus is 1 , alpha is 0.650067715283336
At iteration 7 : K_plus is 1 , alpha is 0.6686107997241004
At iteration 8 : K_plus is 1 , alpha is 0.15944916040307874
At iteration 9 : K_plus is 1 , alpha is 0.5371704847408318
At iteration 10 : K_plus is 1 , alpha is 0.2970079849217686
At iteration 11 : K_plus is 1 , alpha is 0.2035018579644125
At iteration 12 : K_plus is 1 , alpha is 0.17054524193403517
At iteration 13 : K_plus is 1 , alpha is 0.03805691883740289
At iteration 14 : K_plus is 1 , alpha is 0.0741276470726093
At iteration 15 : K_plus is 1 , alpha is 0.09752448508972106
At iteration 16 : K_plus is 1 , alpha is 0.5028424369727091
At i

### Profiling Our Sampler

In order to test the efficiency our code, we profile our Sampler function to find any potential bottlenecks in our code.

In [10]:
# Profiling our code for 100 iterations of our Sampler Function
import cProfile
import re
np.random.seed(1234)
cProfile.run('Sampler(image_data, num_objects, object_dim, E=100,  K_inf = 20, sigma_X = 1, sigma_A = 1, alpha = 1, trunc_val=5)', "profiling_unoptimized")

# Displaying results of our profiling
import pstats
p = pstats.Stats('profiling_unoptimized')
p.strip_dirs().sort_stats("cumulative").print_stats(10)
pass

Complete
Fri Apr 29 19:58:06 2016    profiling_unoptimized

         9027307 function calls in 61.586 seconds

   Ordered by: cumulative time
   List reduced from 65 to 10 due to restriction <10>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000   61.586   61.586 {built-in method builtins.exec}
        1    0.000    0.000   61.586   61.586 <string>:1(<module>)
        1    0.264    0.264   61.586   61.586 <ipython-input-9-d51565b6ccfe>:1(Sampler)
   122948    8.445    0.000   44.037    0.000 <ipython-input-4-cf0b3993be3a>:2(likelihood)
    36324    1.702    0.000   36.025    0.001 <ipython-input-6-7997664e2c7c>:2(Met_zval)
    10000    1.412    0.000   24.966    0.002 <ipython-input-7-3b4495a7a593>:1(New_dishes)
   737688   24.683    0.000   24.683    0.000 {built-in method numpy.core.multiarray.dot}
   122948    1.972    0.000   11.339    0.000 <ipython-input-5-c62396fa41c4>:1(Mcalc)
   122948    3.927    0.000    6.649    0.000 linal

We can see from our profiling output above that the biggest bottleneck within our code is our likelihood function. We will focus our optimization efforts on this function going forward. We notice that we call the Mcalc function along with likelihood function in tandem several times throughout our Sampler function. To reduce the number of calls and to speed up our Sampler function, we move the calculation of the M matrix into our likelihood function. This leads us to a new function we call likelihood_opt. 

#### Likelihood_opt Function

In [11]:
## Optimized likelihood
# This function return the log likelihood
def likelihood_opt(X, Z, sigma_A, sigma_X, K_plus, num_objects, object_dim):
    #Calculate M
    M = np.dot(Z.T, Z) + (sigma_X**2/sigma_A**2)*np.eye(K_plus)
    
    part1 = (-1)*num_objects*(0.5*object_dim)*np.log(2*np.pi)
    part2 = (-1)*(num_objects-K_plus)* object_dim *np.log(sigma_X) 
    part3 = (-1)*object_dim*K_plus*np.log(sigma_A) 
    part4 = (-1)*(0.5*object_dim)* np.log(np.linalg.det(M)) 
    part5 = (-1/(2*sigma_X**2)) * np.trace(np.dot(np.dot(X.T,(np.identity(num_objects) - np.dot(np.dot(Z,np.linalg.inv(M)),Z.T))),X))
    total = part1+part2+part3+part4+part5
    return(total)

### Comparison of Likelihood Functions

We now compare our optimized likelihood function likelihood_opt to our unoptimized version to see if we get any significant speed up. 

In [12]:
import pandas as pd
import time

np.random.seed(1234)

# Setting values to use in likelihood function comparisons
num_objects = image_data.shape[0]
object_dim = image_data.shape[1]

sigma_X=1
sigma_A=1
alpha=1

Z, K_plus = sampleIBP(alpha,num_objects)

# Time the original likelihood function
loops = 1000
time_likelihood=np.zeros(loops)
for l in range(loops):
    t0=time.time()
    M = Mcalc(Z, sigma_X, sigma_A, K_plus)
    likelihood(image_data, Z, M, sigma_A, sigma_X, K_plus, num_objects, object_dim)
    t1=time.time()
    time_likelihood[l]=t1-t0
mean_time_likelihood= round(np.mean(time_likelihood),7)


# Time the optimized likelihood function
time_likelihood_opt = np.zeros(loops)
for l in range(loops):
    t0 = time.time()
    likelihood_opt(image_data, Z, sigma_A, sigma_X, K_plus, num_objects, object_dim)
    t1 = time.time()
    time_likelihood_opt[l] = t1-t0
mean_time_likelihood_opt = round(np.mean(time_likelihood_opt), 7)

time_array = np.array([mean_time_likelihood, mean_time_likelihood_opt])

cols = ["Time"]
index = ["Original Likelihood", "Optimized Likelihood"]

time_df = pd.DataFrame(time_array, columns = cols, index = index)
time_df

Unnamed: 0,Time
Original Likelihood,0.000455
Optimized Likelihood,0.000407


We can see that our optimized likelihood function produces a noticeable speed up over our original likelihood function. Over thousands of iterations, this small speed up should accumulate to produce a much faster sampler.

#### Sampler_opt Function

We rewrite our Sampler function with this new optimization technique.

In [None]:
def sampler_opt(data, num_objects, object_dim, E=1000,  K_inf = 20, sigma_X = 1, sigma_A = 1, alpha = 1, trunc_val=5):
    #Set storage arrays for sampled parameters
    chain_Z = np.zeros([E, num_objects, K_inf])
    chain_K = np.zeros([E, 1])
    chain_sigma_X = np.zeros([E, 1])
    chain_sigma_A = np.zeros([E, 1])
    chain_alpha = np.zeros([E, 1])

    #Initialize parameter values
    num_object= np.shape(data)[0]
    object_dim = np.shape(data)[1]
    [Z, K_plus] = sampleIBP(alpha, num_objects)

    #Compute Harmonic Number
    HN = 0
    for i in range(0, num_objects):
        HN = HN + 1.0/(i+1)

    for e in range(0, E):
        #Store sampled values
        chain_Z[e, :, 0:K_plus] = Z[:, 0:K_plus]
        chain_K[e] = K_plus
        chain_sigma_X[e] = sigma_X
        chain_sigma_A[e] = sigma_A
        chain_alpha[e] = alpha

        if (e%100==0):
            print(e)
        print("At iteration", e, ": K_plus is", K_plus, ", alpha is", alpha) 

        #Generate a new value for Z[i,k] and accept by Metropolis
        for i in range(0, num_objects):
            #First we remove singular features if any
            for k in range(0, K_plus):
                if (k>=K_plus):
                    break
                if(Z[i, k] > 0):
                    if (np.sum(Z[:, k]) - Z[i, k]) <= 0: 
                        Z[i, k] = 0
                        Z[:, k:(K_plus - 1)] = Z[:, (k+1):K_plus]
                        K_plus = K_plus - 1
                        Z = Z[:, 0:K_plus]
                        continue
                #Compute conditional distribution for current cell
                Z[i,k] = Met_zval(data, Z, sigma_X, sigma_A, K_plus, num_objects, object_dim, i, k)


            #Sample new dishes by Metropolis
            new_dishes = New_dishes(data, Z, sigma_X, sigma_A, K_plus, alpha, num_objects, object_dim, trunc_val,i)

            if(new_dishes > 0):
                newcol = np.zeros((num_objects, new_dishes))
                newcol[i,:] = 1
                Z = np.column_stack((Z, newcol))
            K_plus = K_plus + new_dishes

        #Sample sigma_X and sigma_A through Metropolis
        [sigma_X, sigma_A] = Met_sigma(data, Z, sigma_X, sigma_A, K_plus, num_objects, object_dim)
        #Sample alpha via Gibbs
        alpha = np.random.gamma(1 + K_plus, 1/(1+HN))
    
    print("Complete")
    return list([chain_Z, chain_K, chain_sigma_X, chain_sigma_A, chain_alpha])