In [1]:
import torch
import sys 
sys.path.append('../gtm/')

In [2]:
from priors import LogisticNormalPrior

# Simulate 50,000 observations from a Logistic Multivariate Normal 
# lambda captures covariate effects
# sigma is the covariance matrix
lambda_ = torch.rand((2, 20))
lambda_ = lambda_ - lambda_[:,0][:,None]
sqrt_sigma = torch.rand(20,20)
sigma = torch.matmul(sqrt_sigma, sqrt_sigma.T) 
M_prevalence_covariates = torch.randint(0,10,(50000, 2), dtype = torch.float)

# Create the prior, simulate some data, and fit the model.
prior = LogisticNormalPrior(prevalence_covariate_size=2, n_topics=20, prevalence_covariates_regularization=0, device='cpu')
posterior_theta = prior.simulate(M_prevalence_covariates, lambda_, sigma).cpu().numpy()
M_prevalence_covariates = M_prevalence_covariates.cpu().numpy()
prior.update_parameters(posterior_theta, M_prevalence_covariates)

# Check resulting fit
print(prior.lambda_)
print(lambda_)

  from .autonotebook import tqdm as notebook_tqdm


tensor([[ 0.0000, -0.9037, -0.5332, -0.8825, -0.8637, -0.7785, -0.4709, -0.1674,
         -0.7406, -0.8811, -0.5406, -0.3580, -0.5166, -0.5767, -0.8178, -0.1439,
         -0.5788, -0.3842, -0.2018, -0.7684],
        [ 0.0000, -0.2408,  0.5843,  0.5117,  0.1158,  0.2692,  0.3812, -0.0208,
          0.3550, -0.1250,  0.0322,  0.3000, -0.2955,  0.5604,  0.0766, -0.0132,
          0.2247, -0.2889, -0.0411,  0.4861]])
tensor([[ 0.0000, -0.9053, -0.5349, -0.8838, -0.8641, -0.7789, -0.4707, -0.1671,
         -0.7398, -0.8829, -0.5399, -0.3582, -0.5175, -0.5751, -0.8160, -0.1411,
         -0.5797, -0.3826, -0.2021, -0.7692],
        [ 0.0000, -0.2395,  0.5846,  0.5115,  0.1153,  0.2669,  0.3807, -0.0216,
          0.3549, -0.1252,  0.0295,  0.2994, -0.2956,  0.5573,  0.0740, -0.0179,
          0.2241, -0.2924, -0.0427,  0.4868]])


In [7]:
lambda_.shape

torch.Size([2, 20])

In [6]:
sigma.shape

torch.Size([20, 20])

In [8]:
M_prevalence_covariates.shape

(50000, 2)

In [7]:
from priors import DirichletPrior

# Simulate 50,000 observations from a Dirichlet
# lambda captures covariate effects
lambda_ = torch.rand((2, 20))
lambda_ = lambda_ - lambda_[:,0][:,None]
M_prevalence_covariates = torch.randint(0,10,(50000, 2), dtype = torch.float32)

# Create the prior, simulate some data, and fit the model.
prior = DirichletPrior(prevalence_covariates_size=2, n_topics=20, alpha = 0.1, device='cpu', prevalence_covariates_regularization=0, tol=0.00001)
posterior_theta = prior.simulate(M_prevalence_covariates, lambda_).cpu().numpy()
M_prevalence_covariates = M_prevalence_covariates.cpu().numpy()
prior.update_parameters(posterior_theta, M_prevalence_covariates)

# Check resulting fit
print(prior.lambda_)
print(lambda_)

[[ 0.          0.41619313 -0.09090084  0.00737452  0.1474813  -0.04552612
  -0.18772738  0.11748969  0.01861168  0.11958308 -0.22930677  0.54891765
   0.42009583  0.72654384  0.3076732   0.24336933  0.40810952  0.10554393
   0.49134496  0.26717478]
 [ 0.          0.09853726 -0.06457754  0.34815425 -0.09287361  0.20699541
   0.67649996 -0.15705372  0.73222345 -0.1466109   0.50534725  0.44762042
  -0.09751104 -0.02209584 -0.07248931 -0.12899533  0.17573237  0.3124037
  -0.01199143  0.22656843]]
tensor([[ 0.0000,  0.4220, -0.1364,  0.0057,  0.1457, -0.0463, -0.1874,  0.1207,
          0.0153,  0.1218, -0.2325,  0.5436,  0.4210,  0.7257,  0.3072,  0.2447,
          0.4272,  0.1046,  0.4906,  0.2663],
        [ 0.0000,  0.0990, -0.0921,  0.3467, -0.0954,  0.2074,  0.7027, -0.1683,
          0.7257, -0.1526,  0.5109,  0.4427, -0.0985, -0.0244, -0.0745, -0.1303,
          0.1823,  0.3121, -0.0138,  0.2246]])
