## [Bayesian Factorization Machines with Stan and R](https://nyhackr.blob.core.windows.net/presentations/A_Common_Model_Separated_by_Two_Disciplines-Adam_Lauretig.pdf)

### [Mastering Shiny](https://mastering-shiny.org/basic-intro.html)

In [None]:
#shiny::runApp('./shine')

In [None]:
library(MASS)
library(Matrix)
library(trialr)

In [None]:
library(rstan)
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)

### Generating Simulation Data

In [None]:
set.seed(111)

In [None]:
N <- 100
group_1 <- paste0('i', 1:N)

In [None]:
group_1

In [None]:
J <- 20
group_2 <- paste0('j', 1:J)

In [None]:
group_2

In [None]:
K <- 5

In [None]:
predictors <- expand.grid(group_1=group_1, group_2=group_2)

In [None]:
predictors

In [None]:
X_mat <- sparse.model.matrix(~ factor(group_1) + factor(group_2) -1, data=predictors)

In [None]:
predictors_as_numeric <- cbind(as.numeric(factor(predictors[,1])), as.numeric(factor(predictors[,2])))

In [None]:
betas <- matrix(rnorm(n=ncol(X_mat), 0,2))

In [None]:
linear_predictors = X_mat %*% betas  # y

### [LKJ](https://yingqijing.medium.com/lkj-correlation-distribution-in-stan-29927b69e9be)

#### if eta > 1, the correlation values in correlation matrices are going to centered around 0. higher eta indicate no correlations (converge to identity correlation matrix).

![](https://miro.medium.com/max/1400/1*v2LSgSMjYwCjWYpXtk437g.webp)

In [None]:
gamma_omega <- rlkjcorr(n = 1, K = K, eta = 0.9)#gamma_omega_prior)

In [None]:
delta_omega <- rlkjcorr(n=1, K=K, eta=0.5)

In [None]:
gammas <- mvrnorm(n=N, mu=rep(0,K), Sigma = gamma_omega)

In [None]:
deltas <- mvrnorm(n=J, mu=rep(0,K), Sigma = delta_omega)

In [None]:
factor_terms = matrix(NA, nrow=nrow(linear_predictors), ncol=1)

In [None]:
dim(factor_terms)

In [None]:
for (i in 1:nrow(predictors)){
    g1 <- as.character(predictors[i,1])
    g1 <- as.numeric(substring(g1, 2, nchar(g1)))

    g2 <- as.character(predictors[i,2])
    g2 <- as.numeric(substring(g2, 2, nchar(g2)))

    #print(paste0(g1, g2))
    factor_terms[i, ] =  matrix(gammas[g1,], nrow=1) %*% matrix(deltas[g2,], ncol=1)   
}

In [None]:
y <- linear_predictors + factor_terms + rnorm(n=nrow(linear_predictors), 0, 0.1)

In [None]:
data_list = list(
    N = N,
    J = J,
    K = K,
    X = predictors_as_numeric,
    y = y[,1],
    beta_sigma = 0.05,
    y_sigma = 0.02
)

In [None]:
model = stan_model('../Stan/Models/factorization.stan')

In [None]:
remove(fit)
fit <- sampling(object = model,
                data = data_list,
                init = "random",
                control = list(adapt_delta = 0.95),
                chains = 4,
                iter = 1000,
                warmup = 500,
                thin = 1,
                verbose = TRUE)