In [123]:
%%file python_scripts/GenerateData.py
from __future__ import division
import numpy as np
import math
import os

np.random.seed(1)

N = 100 #number of objects
K = 4 #true number of features
D = 36 # dimension of feature


sigmaX0 = .5;
A = np.array((0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
             0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, \
             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0)).reshape(4, D)


I = (sigmaX0)*np.identity(D)
Z0 = np.zeros((N, K))
X = np.zeros((N, D))
for i in range(N):
    Z0[i,:] = (np.random.uniform(0,1,K) > .5).astype(int)
    while (np.sum(Z0[i,:]) == 0):
        Z0[i,:] = (np.random.uniform(0,1,K) > .5).astype(int)
    X[i,:] = np.random.normal(0,1, (1,D)).dot(I)+Z0[i,:].dot(A)
    
if not os.path.exists('Data'):
    os.makedirs('Data')
np.save("Data/SimulatedData", X)
np.save("Data/ZOriginal", Z0)
np.save("Data/AOriginal", A)

Writing python_scripts/GenerateData.py


In [124]:
%%file python_scripts/functions.py
#from __future__ import division
#np.random.seed(1)
#Sample prior
def sampleIBP(alpha, N):
    import numpy as np
    import math
    np.random.seed(1)
    result = np.zeros((N, 1000))
    t = np.random.poisson(alpha)
    if t>0:
        result[0,0:t] = np.ones(t)
    Kplus = t
    for i in range(1,N):
        for j in range(Kplus):
            p = np.sum(result[0:i,j])/(i+1)
            if np.random.uniform(0,1) < p:
                result[i,j] = 1
        t = np.random.poisson(alpha/(i+1))
        if t>0:
            result[i,Kplus:Kplus+t] = np.ones(t)
            Kplus = Kplus+t
    result = result[:,0:Kplus]
    return np.array((result, Kplus))

#Rank one update inverse calculation
def calcInverse(Z,M,i,k,val):
    import numpy as np
    import math
    M_i = M - M.dot(Z[i,:].T.dot(Z[i,:].dot(M)))/(Z[i,:].dot(M.dot(Z[i,:].T))-1)
    Z[i,k] = val
    M = M_i - M_i.dot(Z[i,:].T.dot(Z[i,:].dot(M_i)))/(Z[i,:].dot(M_i.dot(Z[i,:].T))+1)
    Inv = M
    return Inv

#Original likelihood function
def ll0(X, Z, sigmaX, sigmaA, K, D, N):
    import numpy as np
    import math
    return (-1)*np.log(2*np.pi)*N*D*.5 - np.log(sigmaX)*(N-K)*D - np.log(sigmaA)*K*D \
- .5*D*np.log(np.linalg.det(Z.T.dot(Z)+(sigmaX**2/sigmaA**2)*np.identity(K))) \
-.5/(sigmaX**2)*np.trace((X.T.dot( np.identity(N) \
                                  -Z.dot(np.linalg.inv(Z.T.dot(Z)+(sigmaX**2/sigmaA**2)*np.identity(K)).dot(Z.T)))).dot(X))

#Improved Likelihood function
def ll(X, Z, sigmaX, sigmaA, K, D, N):
    import numpy as np
    import math
    M = Z.T.dot(Z)+(sigmaX**2/sigmaA**2)*np.identity(K)
    return (-1)*np.log(2*np.pi)*N*D*.5 - np.log(sigmaX)*(N-K)*D - np.log(sigmaA)*K*D - .5*D*np.log(np.linalg.det(M)) \
        -.5/(sigmaX**2)*np.trace( (X.T.dot( np.identity(N)-Z.dot(np.linalg.inv(M).dot(Z.T)) )).dot(X) )

Writing python_scripts/functions.py


In [125]:
%%file python_scripts/sampler0.py
from __future__ import division
import numpy as np
import math
from functions import sampleIBP
from functions import ll0 as ll

def sampler0(X, niter, BurnIn, sigmaX, sigmaA,alpha, N, D, maxNew):
    np.random.seed(1)
    HN = 0.
    for i in range(1,N+1):
        HN += 1./i

    SampleSize=niter-BurnIn

    K_inf=20

    chainZ=np.zeros((SampleSize,N,K_inf))
    chainK=np.zeros((SampleSize,1))
    chainSigmaX=np.zeros((SampleSize,1))
    chainSigmaA=np.zeros((SampleSize,1))
    chainAlpha=np.zeros((SampleSize,1))
    Z, Kplus = sampleIBP(alpha, N)
    s_counter=0

    for j in range(niter):
        if((j+1)>BurnIn):
            chainZ[s_counter,:,0:Kplus]=Z
            chainK[s_counter]=Kplus
            chainSigmaX[s_counter]=sigmaX
            chainSigmaA[s_counter]=sigmaA
            chainAlpha[s_counter]=alpha
            s_counter=s_counter+1

        for i in range(N):
            for k in range(Kplus):
                #print k
                if k>=Kplus:
                    break     
                #Removing the singular features, i.e. the ones that have 1 for the current object only.
                if Z[i,k] > 0:
                    if (np.sum(Z[:,k])- 1) <=0:
                        #Z[i,k] = 0
                        Z[:,k:(Kplus-1)] = Z[:,(k+1):Kplus] #shift everything one column to the left
                        Kplus = Kplus-1
                        Z = Z[:,0:Kplus] # remove the last column as it is redundent
                        continue #We're no longer looking at this feature, so move to another one               

                P = np.zeros(2)
                #set Z[i,k] = 0 and calculate posterior probability
                Z[i,k] = 0
                P[0] = ll(X, Z, sigmaX, sigmaA, Kplus, D, N) + np.log(N-np.sum(Z[:,k])) - np.log(N)

                #set Z[i,k] = 1 and calculate posterior probability
                Z[i,k] = 1
                P[1] = ll(X, Z,sigmaX, sigmaA, Kplus, D, N)  + np.log(np.sum(Z[:,k])- 1) - np.log(N)

                P = np.exp(P - max(P))
                U = np.random.uniform(0,1)
                if U<(P[1]/(np.sum(P))):
                    Z[i,k] = 1
                else:
                    Z[i,k] = 0   


            #Sample number of new features
            prob = np.zeros(maxNew)
            alphaN = alpha/N
            for kNew in range(maxNew): # max new features is 3
                Z_temp = Z
                if kNew>0:
                    addCols = np.zeros((N,kNew))
                    addCols[i,:] = 1
                    Z_temp = np.hstack((Z_temp, addCols))

                pois = kNew*np.log(alphaN) - alphaN - np.log(math.factorial(kNew))
                lik = ll(X = X, Z = Z_temp, sigmaX = sigmaX, sigmaA = sigmaA, K=(Kplus+kNew), D= D, N= N)
                prob[kNew] = pois + lik

            #normalize prob
            prob = np.exp(prob - max(prob))
            prob = prob/sum(prob)

            U = np.random.uniform(0,1,1)
            p = 0
            kNew=0
            for new in range(maxNew):
                p = p+prob[new]
                if U<p:
                    kNew = new
                    break

            #Add kNew new columns to Z and set the values at ith row to 1 for all of them
            if kNew>0:
                addCols = np.zeros((N,kNew))
                addCols[i,:] = 1
                Z = np.hstack((Z, addCols))
            Kplus = Kplus + kNew 

        llCurrent = ll(X, Z, sigmaX, sigmaA, Kplus, D, N )
        #update sigmaX
        if np.random.uniform(0,1) < .5:
            sigmaX_new = sigmaX - np.random.uniform(0,1)/20
        else:
            sigmaX_new = sigmaX + np.random.uniform(0,1)/20
        llNew = ll(X, Z, sigmaX_new, sigmaA, Kplus, D, N)

        arX = np.exp(min(0,llNew-llCurrent))
        U = np.random.uniform(0,1)
        if U < arX:
            sigmaX = sigmaX_new

        if np.random.uniform(0,1) < .5:
            sigmaA_new = sigmaA - np.random.uniform(0,1)/20
        else:
            sigmaA_new = sigmaA + np.random.uniform(0,1)/20

        llNew = ll(X, Z, sigmaX, sigmaA_new, Kplus, D, N)

        arA = np.exp(min(0,llNew-llCurrent))
        U = np.random.uniform(0,1)
        if U < arA:
            sigmaA = sigmaA_new

        alpha = np.random.gamma(1+Kplus, 1/(1+HN))

Writing python_scripts/sampler0.py


In [126]:
%%file python_scripts/sampler.py
from __future__ import division
import numpy as np
import math
from functions import sampleIBP, ll
import os

def sampler(X, niter, BurnIn, sigmaX, sigmaA, alpha, N, D, maxNew):
    np.random.seed(1)
    HN = 0.
    for i in range(1,N+1):
        HN += 1./i

    SampleSize=niter-BurnIn

    K_inf=20

    chainZ=np.zeros((SampleSize,N,K_inf))
    chainK=np.zeros((SampleSize,1))
    chainSigmaX=np.zeros((SampleSize,1))
    chainSigmaA=np.zeros((SampleSize,1))
    chainAlpha=np.zeros((SampleSize,1))
    Z, Kplus = sampleIBP(alpha, N)
    s_counter=0

    for j in range(niter):
        if((j+1)>BurnIn):
            chainZ[s_counter,:,0:Kplus]=Z
            chainK[s_counter]=Kplus
            chainSigmaX[s_counter]=sigmaX
            chainSigmaA[s_counter]=sigmaA
            chainAlpha[s_counter]=alpha
            s_counter=s_counter+1

        for i in range(N):
            for k in range(Kplus):
                #print k
                if k>=Kplus:
                    break     
                #Removing the singular features, i.e. the ones that have 1 for the current object only.
                if Z[i,k] > 0:
                    if (np.sum(Z[:,k])- 1) <=0:
                        #Z[i,k] = 0
                        Z[:,k:(Kplus-1)] = Z[:,(k+1):Kplus] #shift everything one column to the left
                        Kplus = Kplus-1
                        Z = Z[:,0:Kplus] # remove the last column as it is redundent
                        continue #We're no longer looking at this feature, so move to another one               

                P = np.zeros(2)
                #set Z[i,k] = 0 and calculate posterior probability
                Z[i,k] = 0
                P[0] = ll(X, Z, sigmaX, sigmaA, Kplus, D, N) + np.log(N-np.sum(Z[:,k])) - np.log(N)

                #set Z[i,k] = 1 and calculate posterior probability
                Z[i,k] = 1
                P[1] = ll(X, Z,sigmaX, sigmaA, Kplus, D, N)  + np.log(np.sum(Z[:,k])- 1) - np.log(N)

                P = np.exp(P - max(P))
                U = np.random.uniform(0,1)
                if U<(P[1]/(np.sum(P))):
                    Z[i,k] = 1
                else:
                    Z[i,k] = 0   


            #Sample number of new features
            prob = np.zeros(maxNew)
            alphaN = alpha/N
            for kNew in range(maxNew): # max new features is 3
                Z_temp = Z
                if kNew>0:
                    addCols = np.zeros((N,kNew))
                    addCols[i,:] = 1
                    Z_temp = np.hstack((Z_temp, addCols))

                pois = kNew*np.log(alphaN) - alphaN - np.log(math.factorial(kNew))
                lik = ll(X = X, Z = Z_temp, sigmaX = sigmaX, sigmaA = sigmaA, K=(Kplus+kNew), D= D, N= N)
                prob[kNew] = pois + lik

            #normalize prob
            prob = np.exp(prob - max(prob))
            prob = prob/sum(prob)

            U = np.random.uniform(0,1,1)
            p = 0
            kNew=0
            for new in range(maxNew):
                p = p+prob[new]
                if U<p:
                    kNew = new
                    break

            #Add kNew new columns to Z and set the values at ith row to 1 for all of them
            if kNew>0:
                addCols = np.zeros((N,kNew))
                addCols[i,:] = 1
                Z = np.hstack((Z, addCols))
            Kplus = Kplus + kNew 

        llCurrent = ll(X, Z, sigmaX, sigmaA, Kplus, D, N )
        #update sigmaX
        if np.random.uniform(0,1) < .5:
            sigmaX_new = sigmaX - np.random.uniform(0,1)/20
        else:
            sigmaX_new = sigmaX + np.random.uniform(0,1)/20
        llNew = ll(X, Z, sigmaX_new, sigmaA, Kplus, D, N)

        arX = np.exp(min(0,llNew-llCurrent))
        U = np.random.uniform(0,1)
        if U < arX:
            sigmaX = sigmaX_new

        if np.random.uniform(0,1) < .5:
            sigmaA_new = sigmaA - np.random.uniform(0,1)/20
        else:
            sigmaA_new = sigmaA + np.random.uniform(0,1)/20

        llNew = ll(X, Z, sigmaX, sigmaA_new, Kplus, D, N)

        arA = np.exp(min(0,llNew-llCurrent))
        U = np.random.uniform(0,1)
        if U < arA:
            sigmaA = sigmaA_new

        alpha = np.random.gamma(1+Kplus, 1/(1+HN))
    if not os.path.exists('Data'):
        os.makedirs('Data')
    np.save("Data/chainZ", chainZ)
    np.save("Data/chainK",chainK)
    np.save("Data/chainSigmaX", chainSigmaX)
    np.save("Data/chainSigmaA",chainSigmaA)
    np.save("Data/chainAlpha", chainAlpha)    

Writing python_scripts/sampler.py


In [127]:
%load_ext Cython

The Cython extension is already loaded. To reload it, use:
  %reload_ext Cython


In [128]:
%%file python_scripts/Cython_setup.py
from distutils.core import setup
from Cython.Build import cythonize

setup(
    ext_modules = cythonize("Cython_functions.pyx")
)

Writing python_scripts/Cython_setup.py


In [129]:
%%file python_scripts/Cython_functions.pyx
import numpy as np
cimport cython

@cython.boundscheck(False)
@cython.wraparound(False)
def ll(X, Z, sigmaX, sigmaA, K, D, N):
    #M = Z[:,0:K].T.dot(Z[:,0:K])+sigmaX**2/sigmaA**2*np.identity(K)
    M = Z.T.dot(Z)+(sigmaX**2/sigmaA**2)*np.identity(K)
    return (-1)*np.log(2*np.pi)*N*D*.5 - np.log(sigmaX)*(N-K)*D - np.log(sigmaA)*K*D - .5*D*np.log(np.linalg.det(M)) \
        -.5/(sigmaX**2)*np.trace( (X.T.dot( np.identity(N)-Z.dot(np.linalg.inv(M).dot(Z.T)) )).dot(X) )

np.random.seed(1)
def sampleIBP(alpha, N):
    result = np.zeros((N, 1000))
    t = np.random.poisson(alpha)
    if t>0:
        result[0,0:t] = np.ones(t)
    Kplus = t
    for i in range(1,N):
        for j in range(Kplus):
            p = np.sum(result[0:i,j])/(i+1)
            if np.random.uniform(0,1) < p:
                result[i,j] = 1
        t = np.random.poisson(alpha/(i+1))
        if t>0:
            result[i,Kplus:Kplus+t] = np.ones(t)
            Kplus = Kplus+t
    result = result[:,0:Kplus]
    return np.array((result, Kplus))

Writing python_scripts/Cython_functions.pyx


In [130]:
%%file python_scripts/sampler_cy.py
import numpy as np
import scipy as sp
import math
import Cython_functions as func

def sampler_cy(X, niter, BURN_IN, sigmaX, sigmaA,alpha, N, D, maxNew):
    HN = 0.
    for i in range(1,N+1):
        HN += 1./i

    SampleSize=niter-BurnIn

    K_inf=20

    chainZ=np.zeros((SampleSize,N,K_inf))
    chainK=np.zeros((SampleSize,1))
    chainSigma_X=np.zeros((SampleSize,1))
    chainSigma_A=np.zeros((SampleSize,1))
    chainAlpha=np.zeros((SampleSize,1))
    np.random.seed(1)
    Z, Kplus = func.sampleIBP(alpha, N)
    s_counter=0

    for j in range(niter):
        #print("iteration:",j ,  "Kplus:",Kplus,  "shape of Z", Z.shape, "alpha:", alpha, "sigmaX", sigmaX)
        #update z
        if((j+1)>BURN_IN):
            chain_Z[s_counter,:,0:Kplus]=Z
            chain_K[s_counter]=Kplus
            chain_sigma_X[s_counter]=sigmaX
            chain_sigma_A[s_counter]=sigmaA
            chain_alpha[s_counter]=alpha
            s_counter=s_counter+1

        for i in range(N):
            for k in range(Kplus):
                #print k
                if k>=Kplus:
                    break     
                #Removing the singular features, i.e. the ones that have 1 for the current object only.
                if Z[i,k] > 0:
                    if (np.sum(Z[:,k])- 1) <=0:
                        #Z[i,k] = 0
                        Z[:,k:(Kplus-1)] = Z[:,(k+1):Kplus] #shift everything one column to the left
                        Kplus = Kplus-1
                        Z = Z[:,0:Kplus] # remove the last column as it is redundent
                        continue #We're no longer looking at this feature, so move to another one               

                P = np.zeros(2)
                #set Z[i,k] = 0 and calculate posterior probability
                Z[i,k] = 0
                P[0] = func.ll(X, Z, sigmaX, sigmaA, Kplus, D, N) + np.log(N-np.sum(Z[:,k])) - np.log(N)

                #set Z[i,k] = 1 and calculate posterior probability
                Z[i,k] = 1
                P[1] = func.ll(X, Z,sigmaX, sigmaA, Kplus, D, N)  + np.log(np.sum(Z[:,k])- 1) - np.log(N)

                P = np.exp(P - max(P))
                U = np.random.uniform(0,1)
                if U<(P[1]/(np.sum(P))):
                    Z[i,k] = 1
                else:
                    Z[i,k] = 0   


            #Sample number of new features
            prob = np.zeros(maxNew)
            alphaN = alpha/N
            for kNew in range(maxNew): # max new features is 3
                Z_temp = Z
                if kNew>0:
                    addCols = np.zeros((N,kNew))
                    addCols[i,:] = 1
                    Z_temp = np.hstack((Z_temp, addCols))

                pois = kNew*np.log(alphaN) - alphaN - np.log(math.factorial(kNew))
                lik = func.ll(X = X, Z = Z_temp, sigmaX = sigmaX, sigmaA = sigmaA, K=(Kplus+kNew), D= D, N= N)
                prob[kNew] = pois + lik

            #normalize prob
            prob = np.exp(prob - max(prob))
            prob = prob/sum(prob)

            U = np.random.uniform(0,1,1)
            p = 0
            kNew=0
            for new in range(maxNew):
                p = p+prob[new]
                if U<p:
                    kNew = new
                    break

            #Add kNew new columns to Z and set the values at ith row to 1 for all of them
            if kNew>0:
                addCols = np.zeros((N,kNew))
                addCols[i,:] = 1
                Z = np.hstack((Z, addCols))
            Kplus = Kplus + kNew 

        llCurrent = func.ll(X, Z, sigmaX, sigmaA, Kplus, D, N )
        #update sigmaX
        if np.random.uniform(0,1) < .5:
            sigmaX_new = sigmaX - np.random.uniform(0,1)/20
        else:
            sigmaX_new = sigmaX + np.random.uniform(0,1)/20
        llNew = func.ll(X, Z, sigmaX_new, sigmaA, Kplus, D, N)

        arX = np.exp(min(0,llNew-llCurrent))
        U = np.random.uniform(0,1)
        if U < arX:
            sigmaX = sigmaX_new

        #update sigma_A
        if np.random.uniform(0,1) < .5:
            sigmaA_new = sigmaA - np.random.uniform(0,1)/20
        else:
            sigmaA_new = sigmaA + np.random.uniform(0,1)/20
            
        llNew = func.ll(X, Z, sigmaX, sigmaA_new, Kplus, D, N)

        arA = np.exp(min(0,llNew-llCurrent))

        U = np.random.uniform(0,1)
        if U < arA:
            sigmaA = sigmaA_new

        alpha = np.random.gamma(1+Kplus, 1/(1+HN))

Writing python_scripts/sampler_cy.py


In [131]:
%%file python_scripts/compareSampler.py
from functions import ll0, ll
from sampler import sampler
from sampler0 import sampler0
from sampler_cy import sampler_cy
import numpy as np
import pandas as pd
import time
import os

np.random.seed(1)
X=np.load('Data/SimulatedData.npy')
N=X.shape[0]
D=X.shape[1]
sigmaX=1.
sigmaA=1.
alpha=1.
maxNew=4
niter=1000
BurnIn = 0

t0= time.time()
sampler0(X, niter,BurnIn, sigmaX, sigmaA, alpha, N, D, maxNew)
t1=time.time()
elap1 = t1-t0

t0= time.time()
sampler(X, niter, BurnIn, sigmaX, sigmaA,alpha, N, D, maxNew)
t1=time.time()
elap2 = t1-t0

t0= time.time()
sampler(X, niter, BurnIn, sigmaX, sigmaA,alpha, N, D, maxNew)
t1=time.time()
elap3 = t1-t0


columns = ['Total Time']
index = ['Initial Code','Improved ll','Cythonized']

if not os.path.exists('latex_tables'):
    os.makedirs('latex_tables')

df = pd.DataFrame(np.hstack((elap1,elap2,elap3)),columns=columns,index=index)
tab = df.to_latex()
text_file = open("latex_tables/Runtimes.tex", "w")
text_file.write(tab)
text_file.close()

Writing python_scripts/compareSampler.py


In [132]:
%%file python_scripts/compareInverse.py
import time
import numpy as np
import pandas as pd
from functions import calcInverse, sampleIBP
import os
np.random.seed(1)

X=np.load('Data/SimulatedData.npy')
N=X.shape[0]
D=X.shape[1]
sigmaX=1.
sigmaA=1.
alpha=1.

i=10
k=3

Z,K = sampleIBP(alpha,N)

M = np.linalg.inv(Z.T.dot(Z)+(sigmaX**2/sigmaA**2)*np.identity(K))
Z[i,k] = 1
val = 0
loops = 1000
tcalcInv=np.zeros(loops)
for l in range(loops):
    t0=time.time()
    calcInverse(Z,M,i,k,val)
    t1=time.time()
    tcalcInv[l]=t1-t0
mtcalcInv= round(np.mean(tcalcInv),7)


tlinalgInv=np.zeros(loops)
for l in range(loops):
    t0=time.time()
    np.linalg.inv(Z.T.dot(Z)+(sigmaX**2/sigmaA**2)*np.identity(K))
    t1=time.time()
    tlinalgInv[l]=t1-t0
mtlinalgInv= round(np.mean(tlinalgInv),7)


times = np.array((mtlinalgInv,mtcalcInv))

columns = ['Time']
index = ['linalg.inverse','calcInverse']

if not os.path.exists('latex_tables'):
    os.makedirs('latex_tables')
df = pd.DataFrame(times,columns=columns,index=index)
tab = df.to_latex()
text_file = open("latex_tables/inverseMethods.tex", "w")
text_file.write(tab)
text_file.close()



Writing python_scripts/compareInverse.py


In [133]:
%%file python_scripts/createPlots.py
import matplotlib
matplotlib.use('Agg') 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import Image
import matplotlib.cm as cm
from matplotlib import gridspec
from matplotlib import rcParams
rcParams.update({'figure.autolayout': True})
import os


if not os.path.exists('figures'):
    os.makedirs('figures')
    
    
    
chainZ=np.load("Data/chainZ.npy")
chainK=np.load("Data/chainK.npy")
chainSigmaX=np.load("Data/chainSigmaX.npy")
chainSigmaA=np.load("Data/chainSigmaA.npy")
chainAlpha=np.load("Data/chainAlpha.npy")
Z = chainZ[-1,:,:]

X0 = np.load("Data/SimulatedData.npy")
Z0 = np.load("Data/ZOriginal.npy")
A0 = np.load("Data/AOriginal.npy")

plt.figure(num=None, figsize=(12,3), dpi=80, facecolor='w', edgecolor='k')
plt.subplot(121)
plt.xlabel(r'(a) $K_+$')
plt.hist(chainK, bins = range(10), normed=True)
plt.subplot(122)
plt.hist(np.sum(Z,axis=1), bins = range(1,10))
plt.xlabel('(b) Features in an object')
plt.savefig('figures/kDistribution.png')

Z=Z[:,0:4]


def make_ticklabels_invisible(fig):
    for i, ax in enumerate(fig.axes):
        ax.text(0.5, 0.5, "ax%d" % (i+1), va="center", ha="center")
        for tl in ax.get_xticklabels() + ax.get_yticklabels():
            tl.set_visible(False)

f=plt.figure(num=None, figsize=(12,6), dpi=80, facecolor='w', edgecolor='k')

plt.subplot2grid((13,24),(0,0), colspan=6, rowspan=6)
plt.pcolormesh(A0[0,:].reshape(6,6),cmap=plt.cm.gray)
plt.subplot2grid((13,24),(0,6), colspan=6, rowspan=6)
plt.pcolormesh(A0[1,:].reshape(6,6),cmap=plt.cm.gray)
plt.subplot2grid((13,24),(0,12), colspan=6, rowspan=6)
plt.pcolormesh(A0[2,:].reshape(6,6),cmap=plt.cm.gray)
plt.subplot2grid((13,24),(0,18), colspan=6, rowspan=6)
plt.pcolormesh(A0[3,:].reshape(6,6),cmap=plt.cm.gray)
plt.subplot2grid((13,24),(6,0), colspan=6)
plt.pcolormesh(Z0[0,:][np.newaxis,],cmap=plt.cm.gray)
plt.subplot2grid((13,24),(6,6), colspan=6)
plt.pcolormesh(Z0[1,:][np.newaxis,],cmap=plt.cm.gray)
plt.subplot2grid((13,24),(6,12), colspan=6)
plt.pcolormesh(Z0[2,:][np.newaxis,],cmap=plt.cm.gray)
plt.subplot2grid((13,24),(6,18), colspan=6)
plt.pcolormesh(Z0[3,:][np.newaxis,],cmap=plt.cm.gray)
plt.subplot2grid((13,24),(7,0), colspan=6, rowspan=6)
plt.pcolormesh(X0[0,:].reshape(6,6),cmap=plt.cm.gray)
plt.subplot2grid((13,24),(7,6), colspan=6, rowspan=6)
plt.pcolormesh(X0[1,:].reshape(6,6),cmap=plt.cm.gray)
plt.subplot2grid((13,24),(7,12), colspan=6, rowspan=6)
plt.pcolormesh(X0[2,:].reshape(6,6),cmap=plt.cm.gray)
plt.subplot2grid((13,24),(7,18), colspan=6, rowspan=6)
plt.pcolormesh(X0[3,:].reshape(6,6),cmap=plt.cm.gray)
make_ticklabels_invisible(f)
plt.savefig('figures/Original.png')

sigmaA=np.mean(chainSigmaA)
sigmaX=np.mean(chainSigmaX)
A_post=np.dot(np.dot(np.linalg.inv((np.dot(Z.T,Z)+(sigmaX**2/sigmaA**2)*np.eye(4))),Z.T),X0)

N=X0.shape[0]
D =X0.shape[1] 
Xpost=np.zeros((N,D))
for i in range(N):
    Xpost[i,:]=np.dot(Z[i,:],A_post[0:4,])


    
f=plt.figure(num=None, figsize=(12,6), dpi=80, facecolor='w', edgecolor='k')

plt.subplot2grid((13,24),(0,0), colspan=6, rowspan=6)
plt.pcolormesh(A_post[0,:].reshape(6,6),cmap=plt.cm.gray)
plt.subplot2grid((13,24),(0,6), colspan=6, rowspan=6)
plt.pcolormesh(A_post[1,:].reshape(6,6),cmap=plt.cm.gray)
plt.subplot2grid((13,24),(0,12), colspan=6, rowspan=6)
plt.pcolormesh(A_post[2,:].reshape(6,6),cmap=plt.cm.gray)
plt.subplot2grid((13,24),(0,18), colspan=6, rowspan=6)
plt.pcolormesh(A_post[3,:].reshape(6,6),cmap=plt.cm.gray)
plt.subplot2grid((13,24),(6,0), colspan=6)
plt.pcolormesh(Z[0,:][np.newaxis,],cmap=plt.cm.gray)
plt.subplot2grid((13,24),(6,6), colspan=6)
plt.pcolormesh(Z[1,:][np.newaxis,],cmap=plt.cm.gray)
plt.subplot2grid((13,24),(6,12), colspan=6)
plt.pcolormesh(Z[2,:][np.newaxis,],cmap=plt.cm.gray)
plt.subplot2grid((13,24),(6,18), colspan=6)
plt.pcolormesh(Z[3,:][np.newaxis,],cmap=plt.cm.gray)
plt.subplot2grid((13,24),(7,0), colspan=6, rowspan=6)
plt.pcolormesh(Xpost[0,:].reshape(6,6),cmap=plt.cm.gray)
plt.subplot2grid((13,24),(7,6), colspan=6, rowspan=6)
plt.pcolormesh(Xpost[1,:].reshape(6,6),cmap=plt.cm.gray)
plt.subplot2grid((13,24),(7,12), colspan=6, rowspan=6)
plt.pcolormesh(Xpost[2,:].reshape(6,6),cmap=plt.cm.gray)
plt.subplot2grid((13,24),(7,18), colspan=6, rowspan=6)
plt.pcolormesh(Xpost[3,:].reshape(6,6),cmap=plt.cm.gray)
make_ticklabels_invisible(f)    
    
plt.savefig('figures/Detected.png')


plt.figure(num=None, figsize = (12,6), dpi=80, facecolor='w', edgecolor='k')
plt.subplot(311)
plt.plot(chainSigmaX)
plt.ylabel(r'$\sigma_X$')
plt.subplot(312)
plt.plot(chainSigmaA)
plt.ylabel(r'$\sigma_A$')
plt.subplot(313)
plt.plot(chainAlpha)
plt.ylabel(r'$\alpha$')
plt.savefig('figures/Trace.png')


Writing python_scripts/createPlots.py


In [134]:
%%file python_scripts/finalsimulation.py
from sampler import sampler
import numpy as np
import time
import os
np.random.seed(1)

X=np.load('Data/SimulatedData.npy')
N=X.shape[0]
D=X.shape[1]
sigmaX=1.
sigmaA=1.
alpha=1.
maxNew=4
niter=1000
BurnIn = 0

sampler(X, niter, BurnIn, sigmaX, sigmaA,alpha, N, D, maxNew)

Writing python_scripts/finalsimulation.py


In [135]:
%%file python_scripts/likelihoodcompare.py
from functions import sampleIBP, ll, ll0
import pandas as pd
import numpy as np
import time
import os
np.random.seed(1)

X=np.load('Data/SimulatedData.npy')
N=X.shape[0]
D=X.shape[1]
sigmaX=1.
sigmaA=1.
alpha=1.

Z,K = sampleIBP(alpha,N)

loops = 1000
tll0=np.zeros(loops)
for l in range(loops):
    t0=time.time()
    ll0(X, Z, sigmaX, sigmaA, K, D, N)
    t1=time.time()
    tll0[l]=t1-t0
mtll0= round(np.mean(tll0),7)


tll=np.zeros(loops)
for l in range(loops):
    t0=time.time()
    ll(X, Z, sigmaX, sigmaA, K, D, N)
    t1=time.time()
    tll[l]=t1-t0
mtll= round(np.mean(tll),7)


times = np.array((mtll0,mtll))

columns = ['Time']
index = ['original ll function','Proposed ll function']

if not os.path.exists('latex_tables'):
    os.makedirs('latex_tables')
df = pd.DataFrame(times,columns=columns,index=index)
tab = df.to_latex()
text_file = open("latex_tables/llcomp.tex", "w")
text_file.write(tab)
text_file.close()



Writing python_scripts/likelihoodcompare.py


In [136]:
%%file python_scripts/test_code.py
from __future__ import division
import numpy.testing as npt
import numpy as np
import math
import scipy.stats as stats
from functions import sampleIBP, calcInverse, ll

X=np.load('Data/SimulatedData.npy')
chainSigmaX=np.load("Data/chainSigmaX.npy")
chainZ=np.load("Data/chainZ.npy")
sigmaA=1.
sigmaX=1.
alpha=1.
N=100
Kplus=4
D=36

np.random.seed(1)
Z, Kplus = sampleIBP(alpha, N)
#Z=np.zeros((N,100))


for i in range(N):
    t=stats.poisson.rvs(alpha)
    if t>0:
        Z[i,0:t]=1

Z = Z[:,0:Kplus]
M=np.linalg.inv(np.dot(Z.T,Z)+((sigmaX/sigmaA)**2)*np.identity(Kplus))

#test of calcinverse
def testcalcInverse():
    (i,k,val) = (7,1,1)
    M1=calcInverse(Z,M,i,k,val)
    M2=np.linalg.inv(np.dot(Z.T,Z)+((sigmaX/sigmaA)**2)*np.identity(Kplus))
    npt.assert_almost_equal(M1,M2, decimal =2)

#testing that likelihoods are positive    
def testll1():
    lik = np.exp(ll(X, Z, sigmaX, sigmaA, Kplus, D, N))
    assert lik >= 0
#Make sure that likelihood function gives zerodivision error when sigmaA is 0
def testll2():
    npt.assert_raises(ZeroDivisionError,ll, X, Z, sigmaX, 0, Kplus, D, N)

#Make sure likelihood gives nan if sigmaA is negative
def testll3():
    assert math.isnan(ll(X, Z, sigmaX, -.5, Kplus, D, N))==True

#Make sure likelihood gives nan if sigmaX is negative
def testll4():
    assert math.isnan(ll(X, Z, -0.5, sigmaA, Kplus, D, N))==True    

#test of convergence of code
def testconv1():
    assert (np.abs(np.mean(chainSigmaX[200:])-0.5))<=.05
    
#test that each object has at least one feature as we asserted that while simulating data
Zfinal = chainZ[-1,:,0:4]
def testconv2():
    assert np.sum(Zfinal,axis=1).all()>=1


Writing python_scripts/test_code.py


In [137]:
! py.test

platform linux2 -- Python 2.7.9 -- py-1.4.25 -- pytest-2.6.3
collected 0 items / 1 errors 
[0m
_________________ ERROR collecting python_scripts/test_code.py _________________
python_scripts/test_code.py:8: in <module>
[1m    X=np.load('Data/SimulatedData.npy')[0m
../anaconda/lib/python2.7/site-packages/numpy/lib/npyio.py:369: in load
[1m    fid = open(file, "rb")[0m
[1m[31mE   IOError: [Errno 2] No such file or directory: 'Data/SimulatedData.npy'[0m
