In [1]:
import torch
import sys 
sys.path.append('../gtm/')

In [2]:
from priors import LogisticNormalPrior

# Simulate 50,000 observations from a Logistic Multivariate Normal 
# lambda captures covariate effects
# sigma is the covariance matrix
lambda_ = torch.rand((2, 20))
lambda_ = lambda_ - lambda_[:,0][:,None]
sqrt_sigma = torch.rand(20,20)
sigma = torch.matmul(sqrt_sigma, sqrt_sigma.T) 
M_prevalence_covariates = torch.randint(0,10,(50000, 2), dtype = torch.float)

# Create the prior, simulate some data, and fit the model.
prior = LogisticNormalPrior(prevalence_covariate_size=2, n_topics=20, prevalence_covariates_regularization=0,device='cpu')
posterior_theta = prior.simulate(M_prevalence_covariates, lambda_, sigma).cpu().numpy()
M_prevalence_covariates = M_prevalence_covariates.cpu().numpy()
prior.update_parameters(posterior_theta, M_prevalence_covariates)

# Check resulting fit
print(prior.lambda_)
print(lambda_)

  from .autonotebook import tqdm as notebook_tqdm


tensor([[ 0.0000, -0.5883, -0.0691, -0.7169, -0.6580, -0.4289, -0.0048, -0.6477,
         -0.4112, -0.7009, -0.5507, -0.0154, -0.5065,  0.0020, -0.7585, -0.5440,
         -0.1713, -0.3676, -0.0053, -0.8080],
        [ 0.0000, -0.2771, -0.2818,  0.3852, -0.1307, -0.1265,  0.6091,  0.4783,
         -0.1915, -0.2216,  0.2979, -0.0129,  0.1821,  0.4526,  0.0889,  0.5959,
          0.1679,  0.0432,  0.1862,  0.3444]])
tensor([[ 0.0000, -0.6009, -0.0721, -0.7213, -0.6694, -0.4294, -0.0056, -0.6484,
         -0.4187, -0.7154, -0.5495, -0.0163, -0.5118, -0.0017, -0.7644, -0.5449,
         -0.1749, -0.3691, -0.0052, -0.8101],
        [ 0.0000, -0.2893, -0.2804,  0.3874, -0.1375, -0.1291,  0.6113,  0.4792,
         -0.1906, -0.2377,  0.2991, -0.0109,  0.1855,  0.4565,  0.0886,  0.5970,
          0.1693,  0.0436,  0.1832,  0.3442]])


In [7]:
lambda_.shape

torch.Size([2, 20])

In [6]:
sigma.shape

torch.Size([20, 20])

In [8]:
M_prevalence_covariates.shape

(50000, 2)

In [3]:
from priors import DirichletPrior

# Simulate 50,000 observations from a Dirichlet
# lambda captures covariate effects
lambda_ = torch.rand((2, 20))
lambda_ = lambda_ - lambda_[:,0][:,None]
M_prevalence_covariates = torch.randint(0,10,(50000, 2), dtype = torch.float32)

# Create the prior, simulate some data, and fit the model.
prior = DirichletPrior(prevalence_covariates_size=2, n_topics=20, alpha = 0.1, device='cpu')
posterior_theta = prior.simulate(M_prevalence_covariates, lambda_).cpu().numpy()
M_prevalence_covariates = M_prevalence_covariates.cpu().numpy()
prior.update_parameters(posterior_theta, M_prevalence_covariates, num_epochs = 1000)

# Check resulting fit
print(prior.lambda_)
print(lambda_)

[[ 0.         -0.10163552 -0.06662548 -0.5302729  -0.5390905  -0.19397649
  -0.34628376 -0.37523258 -0.33119085 -0.20138949 -0.13122478 -0.36949947
  -0.38264674 -0.23250103 -0.35249394 -0.41616222  0.07745466 -0.38039687
  -0.31699422 -0.5743764 ]
 [ 0.          0.07534995 -0.23296171 -0.09940197 -0.06067064 -0.28099138
  -0.3719383  -0.30336088 -0.39149857 -0.34973356 -0.3114789  -0.2623979
  -0.34812102 -0.37462288 -0.3237797  -0.26575115 -0.6822994  -0.13737576
  -0.34883595  0.00196885]]
tensor([[ 0.0000, -0.0764, -0.0788, -0.4943, -0.6514, -0.3633, -0.7124, -0.8294,
         -0.5434, -0.3313, -0.1291, -0.8340, -0.5385, -0.4368, -0.4672, -0.8232,
          0.0016, -0.4060, -0.7052, -0.7424],
        [ 0.0000,  0.0638, -0.2218, -0.0827, -0.1568, -0.4533, -0.8619, -0.5657,
         -0.7198, -0.4924, -0.2935, -0.5075, -0.4594, -0.7971, -0.4209, -0.4167,
         -0.6701, -0.1798, -0.8887, -0.1382]])
