In [1]:
import torch
import sys 
sys.path.append('../gtm/')

In [2]:
from priors import LogisticNormalPrior

# Simulate 50,000 observations from a Logistic Multivariate Normal 
# lambda captures covariate effects
# sigma is the covariance matrix
lambda_ = torch.rand((2, 20))
lambda_ = lambda_ - lambda_[:,0][:,None]
sqrt_sigma = torch.rand(20,20)
sigma = torch.matmul(sqrt_sigma, sqrt_sigma.T) 
M_prevalence_covariates = torch.randint(0,10,(50000, 2), dtype = torch.float)

# Create the prior, simulate some data, and fit the model.
prior = LogisticNormalPrior(prevalence_covariate_size=2, n_topics=20, prevalence_covariates_regularization=0,device='cpu')
posterior_theta = prior.simulate(M_prevalence_covariates, lambda_, sigma).cpu().numpy()
M_prevalence_covariates = M_prevalence_covariates.cpu().numpy()
prior.update_parameters(posterior_theta, M_prevalence_covariates)

# Check resulting fit
print(prior.lambda_)
print(lambda_)

  from .autonotebook import tqdm as notebook_tqdm


tensor([[ 0.0000,  0.0546, -0.2109, -0.2917, -0.2136, -0.2115,  0.5490,  0.4736,
          0.3502,  0.1898,  0.3846,  0.0085,  0.3960, -0.2153, -0.0978, -0.0215,
         -0.0391, -0.0710,  0.3257,  0.3533],
        [ 0.0000, -0.0102, -0.1632, -0.0407, -0.2846,  0.6248, -0.3030,  0.4427,
         -0.1658,  0.0640,  0.5974, -0.1886, -0.2301,  0.0812,  0.5434, -0.3029,
          0.3831,  0.3381, -0.3117,  0.2584]])
tensor([[ 0.0000,  0.0568, -0.2121, -0.2958, -0.2217, -0.2082,  0.5525,  0.4753,
          0.3550,  0.1921,  0.3854,  0.0113,  0.4001, -0.2160, -0.1009, -0.0213,
         -0.0406, -0.0668,  0.3297,  0.3547],
        [ 0.0000, -0.0116, -0.1736, -0.0468, -0.3033,  0.6223, -0.3065,  0.4423,
         -0.1686,  0.0634,  0.5973, -0.1941, -0.2328,  0.0804,  0.5478, -0.3106,
          0.3848,  0.3360, -0.3178,  0.2591]])


In [3]:
from priors import DirichletPrior

# Simulate 50,000 observations from a Dirichlet
# lambda captures covariate effects
lambda_ = torch.rand((2, 20))
lambda_ = lambda_ - lambda_[:,0][:,None]
M_prevalence_covariates = torch.randint(0,10,(50000, 2), dtype = torch.float32)

# Create the prior, simulate some data, and fit the model.
prior = DirichletPrior(prevalence_covariates_size=2, n_topics=20, alpha = 0.1, device='cpu')
posterior_theta = prior.simulate(M_prevalence_covariates, lambda_).cpu().numpy()
M_prevalence_covariates = M_prevalence_covariates.cpu().numpy()
prior.update_parameters(posterior_theta, M_prevalence_covariates, num_epochs = 1000)

# Check resulting fit
print(prior.lambda_)
print(lambda_)

[[ 0.         -0.10163552 -0.06662548 -0.5302729  -0.5390905  -0.19397649
  -0.34628376 -0.37523258 -0.33119085 -0.20138949 -0.13122478 -0.36949947
  -0.38264674 -0.23250103 -0.35249394 -0.41616222  0.07745466 -0.38039687
  -0.31699422 -0.5743764 ]
 [ 0.          0.07534995 -0.23296171 -0.09940197 -0.06067064 -0.28099138
  -0.3719383  -0.30336088 -0.39149857 -0.34973356 -0.3114789  -0.2623979
  -0.34812102 -0.37462288 -0.3237797  -0.26575115 -0.6822994  -0.13737576
  -0.34883595  0.00196885]]
tensor([[ 0.0000, -0.0764, -0.0788, -0.4943, -0.6514, -0.3633, -0.7124, -0.8294,
         -0.5434, -0.3313, -0.1291, -0.8340, -0.5385, -0.4368, -0.4672, -0.8232,
          0.0016, -0.4060, -0.7052, -0.7424],
        [ 0.0000,  0.0638, -0.2218, -0.0827, -0.1568, -0.4533, -0.8619, -0.5657,
         -0.7198, -0.4924, -0.2935, -0.5075, -0.4594, -0.7971, -0.4209, -0.4167,
         -0.6701, -0.1798, -0.8887, -0.1382]])
