In [1]:
import numpy as np
import numpy.random as rn
import matplotlib.pyplot as plt
import seaborn as sns

import scipy.stats as st

import sys
from path import Path
sys.path.append('..')
from apf.models.bpmf import BPMF

  import pandas.util.testing as tm
Using numpy backend.


In [2]:
seed = 222
rn.seed(seed)

n_samps = 100
n_feats = 200
n_comps = 10

eps = 0.1
b = 10

b = rn.gamma(eps, 1./eps)
Theta_IK = rn.gamma(eps, 1./(eps * b), size=(n_samps, n_comps))
Phi_KJ = rn.dirichlet(np.ones(n_feats), size=n_comps)
Lambda_K = rn.gamma(eps, 1./eps, size=n_comps)
Mu_IJ = Theta_IK.dot(Phi_KJ)

Y_IJ = rn.poisson(Mu_IJ)                        # count data
B_IJ = (Y_IJ > 0).astype(int)                   # binarized data
mask_IJ = rn.binomial(1, 0.1, size=Y_IJ.shape)  # randomly make 10% missing

count_data = np.ma.MaskedArray(Y_IJ, mask_IJ.astype(bool))  # create masked count data
binary_data = np.ma.MaskedArray(B_IJ, mask_IJ.astype(bool)) # create masked binary data

In [3]:
K = 15
model = BPMF(n_samps=count_data.shape[0],
             n_feats=count_data.shape[1],
             n_comps=K,
             binary=False,
             n_threads=3)

burnin = 1000  # 1000 iterations of burnin
model.fit(count_data, 
          n_itns=burnin, 
          initialize=True,
          verbose=0)

n_epochs = 100  # number of Gibbs sampling epochs
n_itns = 50     # number of Gibbs iterations per epoch

prob_IJ = np.zeros(count_data.shape)
for epoch in range(n_epochs):
    model.fit(count_data, 
              n_itns=n_itns, 
              initialize=False,
              verbose=0)
    
    pred_IJ = model.reconstruct()
    prob_IJ += st.poisson.pmf(count_data.data, pred_IJ)
prob_IJ /= float(n_epochs)

mask = count_data.mask
test_data = count_data.data[mask]
train_data = count_data.data[~mask]

test_ppd = np.exp(np.mean(np.log(prob_IJ[mask])))
train_ppd = np.exp(np.mean(np.log(prob_IJ[~mask])))

print(f'Pointwise predictive density on training data: {train_ppd}')
print(f'Pointwise predictive density on test data: {test_ppd}')

Pointwise predictive density on training data: 0.27378371462821355
Pointwise predictive density on test data: 0.22698889775620879


In [4]:
K = 15
model = BPMF(n_samps=binary_data.shape[0],
             n_feats=binary_data.shape[1],
             n_comps=K,
             binary=True,
             n_threads=3)

burnin = 1000  # 1000 iterations of burnin
model.fit(binary_data, 
          n_itns=burnin, 
          initialize=True,
          verbose=0)

n_epochs = 100  # number of Gibbs sampling epochs
n_itns = 50     # number of Gibbs iterations per epoch

prob_IJ = np.zeros(binary_data.shape)
for epoch in range(n_epochs):
    model.fit(binary_data, 
              n_itns=n_itns, 
              initialize=False,
              verbose=0)
    
    pred_IJ = -np.expm1(-model.reconstruct())
    prob_IJ += st.bernoulli.pmf(binary_data.data, pred_IJ)
prob_IJ /= float(n_epochs)

mask = binary_data.mask
test_data = binary_data.data[mask]
train_data = binary_data.data[~mask]

test_ppd = np.exp(np.mean(np.log(prob_IJ[mask])))
train_ppd = np.exp(np.mean(np.log(prob_IJ[~mask])))

print(f'Pointwise predictive density on training data: {train_ppd}')
print(f'Pointwise predictive density on test data: {test_ppd}')

Pointwise predictive density on training data: 0.7734989381395044
Pointwise predictive density on test data: 0.6320747500751928
