Permalink
Browse files

check in a couple of clean ups, added identifiable tag, retired cente…

…r tag, and a simple debug for main effect only
  • Loading branch information...
1 parent 57f0991 commit bda67b6fab8fa3a4219d5360651d9105e006a8c7 Liang Zhang committed Nov 6, 2015
View
72 src/R/model/multicontext_model_genData.R
@@ -1,16 +1,48 @@
### Copyright (c) 2011, Yahoo! Inc. All rights reserved.
### Copyrights licensed under the New BSD License. See the accompanying LICENSE file for terms.
-###
+###
### Author: Bee-Chung Chen
# Generate some normal data for testing
#
# All feature values (x_obs, x_src, x_dst, x_ctx) follow N(0,1)
-#
+#
# Visually checked 12/20, 2010
#
+
+# Generate a data with just intercept + alpha_i + beta_j without factors or cold-start models, for debugging purpose
+genMainEffectData <- function(
+ nSrcNodes, nDstNodes, nObs, intercept=1,
+ var_y, var_alpha, var_beta, binary.response=FALSE
+) {
+ alpha = rnorm(nSrcNodes, mean=0, sd=sqrt(var_alpha));
+ beta = rnorm(nDstNodes, mean=0, sd=sqrt(var_beta));
+ src.id = c(1:nSrcNodes, sample.int(nSrcNodes, nObs-nSrcNodes, replace=TRUE));
+ dst.id = c(sample.int(nDstNodes, nObs-nDstNodes, replace=TRUE), 1:nDstNodes);
+ output=list();
+ output$obs = data.frame(src.id=as.integer(src.id), dst.id=as.integer(dst.id));
+ pred.y = intercept + alpha[src.id] + beta[dst.id];
+ if(binary.response){
+ output$y.prob = 1/(1+exp(-pred.y));
+ output$obs$y = rbinom(n=nObs,size=1,prob=output$y.prob);
+ }else{
+ output$obs$y = pred.y + rnorm(nObs, mean=0, sd=sqrt(var_y));
+ }
+
+ # Placeholder for features, intercept only
+ x_obs = matrix(1, nrow=nObs, ncol=1);
+ x_src = matrix(1, nrow=nSrcNodes, ncol=2);
+ x_dst = matrix(1, nrow=nDstNodes, ncol=2);
+
+ output$alpha = alpha
+ output$beta = beta
+ output$intercept = intercept
+ output$feature = list(x_src=x_src, x_dst=x_dst, x_obs=x_obs);
+ output
+}
+
genNormalData <- function(
- nSrcNodes, nDstNodes, nObs,
+ nSrcNodes, nDstNodes, nObs,
nSrcContexts, nDstContexts, nEdgeContexts, nFactors, has.u, has.gamma, nLocalFactors,
b, g0, d0, h0=NULL, G=NULL, D=NULL, H=NULL, q=NULL, r=NULL,
var_y=NULL, var_alpha, var_beta, var_gamma=NULL, var_u=NULL, var_v=NULL, var_w=NULL,
@@ -22,7 +54,7 @@ genNormalData <- function(
frac.zeroFeatures=0, y.bias=0
){
if(!is.vector(b)) stop("b should be a vector");
-
+
nObsFeatures = length(b);
nSrcFeatures = nrow(g0);
nDstFeatures = nrow(d0);
@@ -50,9 +82,9 @@ genNormalData <- function(
if(nLocalFactors > 0){
if(nLocalFactors*nEdgeContexts != nFactors) stop("nLocalFactors*nEdgeContexts != nFactors");
}
-
+
if(nObs < nSrcNodes || nObs < nDstNodes) stop("nObs < nSrcNodes || nObs < nDstNodes");
-
+
x_obs = matrix(rnorm(nObs*nObsFeatures), nrow=nObs, ncol=nObsFeatures, dimnames=list(NULL, sprintf("x_obs_%03d", 1:nObsFeatures)));
x_src = matrix(rnorm(nSrcNodes*nSrcFeatures), nrow=nSrcNodes, ncol=nSrcFeatures, dimnames=list(NULL, sprintf("x_src_%03d", 1:nSrcFeatures)));
x_dst = matrix(rnorm(nDstNodes*nDstFeatures), nrow=nDstNodes, ncol=nDstFeatures, dimnames=list(NULL, sprintf("x_dst_%03d", 1:nDstFeatures)));
@@ -88,7 +120,7 @@ genNormalData <- function(
}else{
alpha = alpha + rnorm(nSrcNodes, mean=0, sd=sqrt(var_alpha));
}
-
+
beta = as.matrix(x_dst %*% d0);
beta_global = NULL;
if(nDstContexts > 1){
@@ -103,7 +135,7 @@ genNormalData <- function(
if(has.gamma){
gamma = drop(as.matrix(x_ctx %*% h0) + rnorm(nEdgeContexts, mean=0, sd=sqrt(var_gamma)));
}
-
+
if(has.u) u = as.matrix(x_src %*% G) + rnorm(nSrcNodes*nFactors, mean=0, sd=sqrt(var_u));
if(nFactors > 0) v = as.matrix(x_dst %*% D) + rnorm(nDstNodes*nFactors, mean=0, sd=sqrt(var_v));
if(nLocalFactors > 0){
@@ -112,7 +144,7 @@ genNormalData <- function(
}else{
if(nEdgeContexts > 0) w = as.matrix(x_ctx %*% H) + rnorm(nEdgeContexts*nFactors, mean=0, sd=sqrt(var_w));
}
-
+
src.id = c(1:nSrcNodes, sample.int(nSrcNodes, nObs-nSrcNodes, replace=TRUE));
dst.id = c(sample.int(nDstNodes, nObs-nDstNodes, replace=TRUE), 1:nDstNodes);
src.context = NULL;
@@ -127,9 +159,9 @@ genNormalData <- function(
output$obs$src.context = src.context;
output$obs$dst.context = dst.context;
output$obs$edge.context = edge.context;
-
+
output$feature = list(x_src=x_src, x_dst=x_dst, x_obs=x_obs, x_ctx=x_ctx);
-
+
output$factor = list(alpha=alpha, beta=beta);
if(nSrcContexts > 1) output$factor$alpha_global = alpha_global;
if(nDstContexts > 1) output$factor$beta_global = beta_global;
@@ -147,7 +179,7 @@ genNormalData <- function(
}
if(nSrcContexts > 1) param[["q"]] = q;
if(nDstContexts > 1) param[["r"]] = r;
-
+
param$var_y = var_y; param$var_alpha = var_alpha; param$var_beta = var_beta;
if(nSrcContexts > 1) param$var_alpha_global = var_alpha_global;
if(nDstContexts > 1) param$var_beta_global = var_beta_global;
@@ -157,17 +189,17 @@ genNormalData <- function(
param$var_v = var_v;
if(nEdgeContexts > 0) param$var_w = var_w;
}
-
+
output$param = param;
-
+
pred.y = y.bias + predict.y.from.factors(output$obs, output$factor, output$feature, output$param);
if(binary.response){
output$y.prob = 1/(1+exp(-pred.y));
output$obs$y = rbinom(n=nObs,size=1,prob=output$y.prob);
}else{
output$obs$y = pred.y + rnorm(nObs, mean=0, sd=sqrt(var_y));
}
-
+
if(sparse.matrices){
if(index.value.format) func = matrix.to.index.value
else func = function(x){ return(Matrix(x, sparse=TRUE)); };
@@ -177,13 +209,13 @@ genNormalData <- function(
if(!is.null(x_ctx)) x_ctx = func(x_ctx);
output$feature = list(x_src=x_src, x_dst=x_dst, x_obs=x_obs, x_ctx=x_ctx);
}
-
+
return(output);
}
# Visually checked 12/20, 2010
generate.GaussianData <- function(
- nSrcNodes, nDstNodes, nObs,
+ nSrcNodes, nDstNodes, nObs,
nSrcContexts, nDstContexts, nEdgeContexts, nFactors, has.gamma, has.u,
nObsFeatures, nSrcFeatures, nDstFeatures, nCtxFeatures=0, nLocalFactors=0,
b.sd=1, g0.sd=1, d0.sd=1, h0.sd=1, G.sd=1, D.sd=1, H.sd=1, q.sd=1, r.sd=1,
@@ -208,10 +240,10 @@ generate.GaussianData <- function(
q = NULL; r = NULL;
if(nSrcContexts > 1) q = rnorm(nSrcContexts, mean=q.mean, sd=q.sd);
if(nDstContexts > 1) r = rnorm(nDstContexts, mean=r.mean, sd=r.sd);
-
+
ans = genNormalData(
- nSrcNodes=nSrcNodes, nDstNodes=nDstNodes, nObs=nObs,
- nSrcContexts=nSrcContexts, nDstContexts=nDstContexts, nEdgeContexts=nEdgeContexts,
+ nSrcNodes=nSrcNodes, nDstNodes=nDstNodes, nObs=nObs,
+ nSrcContexts=nSrcContexts, nDstContexts=nDstContexts, nEdgeContexts=nEdgeContexts,
nFactors=nFactors, nLocalFactors=nLocalFactors, has.u=has.u, has.gamma=has.gamma,
b=b, g0=g0, d0=d0, h0=h0, G=G, D=D, H=H, q=q, r=r,
var_y=var_y, var_alpha=var_alpha, var_beta, var_gamma=var_gamma, var_u=var_u, var_v=var_v, var_w=var_w,
View
16 src/RLFM-ars-logistic/R/examples.R
@@ -13,9 +13,9 @@ source("src/R/model/multicontext_model_genData.R");
source("src/R/model/multicontext_model_utils.R");
set.seed(0);
d = generate.GaussianData(
- nSrcNodes=1003, nDstNodes=1003, nObs=100003,
+ nSrcNodes=10, nDstNodes=10, nObs=100003,
nSrcContexts=1, nDstContexts=1, nEdgeContexts=0, nFactors=3, has.gamma=FALSE, has.u=TRUE,
- nObsFeatures=13, nSrcFeatures=19, nDstFeatures=23,
+ nObsFeatures=0, nSrcFeatures=0, nDstFeatures=0,
b.sd=1, g0.sd=1, d0.sd=1, G.sd=1, D.sd=1,
var_y=0.1, var_alpha=0.5, var_beta=0.5, var_v=1, var_u=1,
has.intercept=TRUE, binary.response=TRUE,
@@ -83,8 +83,8 @@ data.test = indexTestData(
library(glmnet);
dyn.load("lib/c_funcs.so");
dyn.load("lib/arslogistic.so");
-source("src/R/c_funcs.R");
-source("src/R/util.R");
+#source("src/R/c_funcs.R");
+#source("src/R/util.R");
source("src/R/model/multicontext_model_EM.R");
source("src/R/model/multicontext_model_utils.R");
source("src/RLFM-ars-logistic/R/c_funcs.R");
@@ -93,8 +93,8 @@ source("src/RLFM-ars-logistic/R/fit.MCEM.logistic.R");
source("src/RLFM-ars-logistic/R/regression.R");
set.seed(1); # NOTE: set.seed doesn't work because the ARS code uses its own random number generator
ans = fit.ARS.logistic(
- nIter=10, # Number of EM iterations
- nSamples=10, nBurnin=20, # Number of samples and burnin drawn in each E-step: could be a vector of size nIter.
+ nIter=20, # Number of EM iterations
+ nSamples=100, nBurnin=20, # Number of samples and burnin drawn in each E-step: could be a vector of size nIter.
data.train=data.train, # Training data = list(obs, feature)
nFactors=3, # Number of factors (i.e., number of dimensions of u)
init.model=NULL, # Initial model = list(factor, param). Set to NULL to use the default.
@@ -110,7 +110,7 @@ ans = fit.ARS.logistic(
use.glmnet=TRUE,
# ARS parameters
ars_ninit=3, ars_qcent=c(5.0,50.0,95.0), # number of initial points and the quantiles of the initial points
- ars_xl=-5, ars_xu=5, # lower bound and upper bound of ARS samples
+ ars_xl=-3, ars_xu=3, # lower bound and upper bound of ARS samples
ars_alpha=0.5,
- center=FALSE # center the random effects at every iteration of the ARS?
+ identifiable=F
);
View
74 src/RLFM-ars-logistic/R/fit.MCEM.logistic.R
@@ -30,8 +30,8 @@ fit.ARS.logistic <- function(
nFactors, # Number of factors (i.e., number of dimensions of u)
init.model=NULL, # Initial model = list(factor, param). Set to NULL to use the default.
doMstep=TRUE,
- # initialization parameters
- var_alpha=1, var_beta=1, var_v=1, var_u=1,
+ # initialization parameters, should be very very small
+ var_alpha=0.1, var_beta=0.1, var_v=0.05, var_u=0.05,
# others
out.level=0, # out.level=1: Save the parameter values out.dir/est.highestCDL and out.dir/est.last
out.dir=NULL, # out.level=2: Save the parameter values of each iteration i to out.dir/est.i
@@ -50,8 +50,8 @@ fit.ARS.logistic <- function(
fit.ars.alpha=FALSE, # whether we want to fit ars_alpha in the M-step
fit.regression=TRUE, # do we want to update the regression parameters?
beta.int=FALSE, # do we want to put the intercept in the beta prior?
- center=TRUE, # center the random effects at every iteration of the ARS?
- main.effects=FALSE, # only fix the main effects. Leave u and v set to 0.
+ main.effects=FALSE, # only fit the main effects. Leave u and v set to 0.
+ identifiable=TRUE, # Whether we want the model to be identifiable or not
... # Additional parameters passing into the regression functions (e.g., bayesglm)
){
obs=NULL; feature=NULL;
@@ -123,8 +123,8 @@ fit.MCEM.logistic <- function(
fit.ars.alpha=F, # whether we want to fit ars_alpha in the M-step
fit.regression=T, # do we want to update the regression parameters?
beta.int=F, # do we want to put the intercept in the beta prior?
- center=T, # center the random effects at every iteration of the ARS?
- main.effects=F, # only fix the main effects. Leave u and v set to 0.
+ main.effects=F, # only fit the main effects. Leave u and v set to 0.
+ identifiable=TRUE, # Whether we want the model to be identifiable or not
... # Additional parameters passing into the regression functions (e.g., bayesglm)
){
user = as.integer(user);
@@ -133,13 +133,19 @@ fit.MCEM.logistic <- function(
#if(beta.int && center) stop("Cannot learn intercept in random effects when centered")
if (main.effects)
- {
+ {
u = matrix(0, dim(u)[1], dim(u)[2])
v = matrix(0, dim(v)[1], dim(v)[2])
- }
+ }
- # Make sure initialization of v are all positive
- v = abs(v);
+ if (identifiable) {
+ # Make sure initialization of v are all positive
+ v = abs(v);
+ beta.int = FALSE;
+ center = TRUE;
+ } else {
+ center = FALSE;
+ }
if(beta.int && length(d0) != dim(z)[2] + 1) d0 = c(0, d0)
@@ -154,8 +160,7 @@ fit.MCEM.logistic <- function(
nFactors = ncol(u);
LL = rep(NA, nIter+1); # LL records the logLikelihood of each iteration
-# bestLL = logLikelihood.logistic(user, item, y, x, w, z, alpha, beta, u, v, b, g0, G, d0, D, var_alpha, var_beta, var_u, var_v, ars_alpha, beta.int, debug, use.C.EStep);
- bestLL = logLikelihood.logistic(user, item, y, x, w, z, alpha, beta, u, v, b, g0, G, d0, D, var_alpha, var_beta, var_u, var_v, ars_alpha, beta.int=F, debug, use.C.EStep);
+ bestLL = logLikelihood.logistic(user, item, y, x, w, z, alpha, beta, u, v, b, g0, G, d0, D, var_alpha, var_beta, var_u, var_v, ars_alpha, beta.int, debug, use.C.EStep);
LL[1] = bestLL;
if (length(var_u)==1) var_u = rep(var_u,nFactors);
@@ -207,19 +212,20 @@ fit.MCEM.logistic <- function(
ars_XI_alpha = rep(xi,nUsers);
ars_XI_beta = rep(xi,nItems);
ars_XI_u = rep(xi,nUsers*nFactors);
- # Initialize ars_XI_v as positive
- xi = rep(0,ars_ninit);
- for(i in 1:ars_ninit){
- xi[i] <- (i + 1.0)*(ars_xu)/(ars_ninit + 1.0);
- if(xi[i] >= ars_xu) xi[i]=xi[i] - .1;
- if(xi[i] <= 0) xi[i] = xi[i] + .1;
+ if (identifiable) {
+ # Initialize ars_XI_v as positive
+ xi = rep(0,ars_ninit);
+ for(i in 1:ars_ninit){
+ xi[i] <- i*(ars_xu)/(ars_ninit + 1.0);
+ if(xi[i] >= ars_xu) xi[i]=xi[i] - .1;
+ if(xi[i] <= 0) xi[i] = xi[i] + .1;
+ }
}
ars_XI_v = rep(xi,nItems*nFactors);
begin.time = proc.time();
for(iter in 1:nIter){
- # gc(verbose=T);
if(verbose >= 2){
cat("---------------------------------------------------------\n",
@@ -270,8 +276,7 @@ fit.MCEM.logistic <- function(
time.used = proc.time() - b.time;
time.used.1 = time.used;
if(verbose >= 2){
- # ll = logLikelihood.logistic(user, item, y, x, w, z, alpha, beta, u, v, b, g0, G, d0, D, var_alpha, var_beta, var_u, var_v, ars_alpha, beta.int, debug, use.C.EStep);
- ll = logLikelihood.logistic(user, item, y, x, w, z, alpha, beta, u, v, b, g0, G, d0, D, var_alpha, var_beta, var_u, var_v, ars_alpha=ars_alpha, beta.int=F, debug=debug, use.C=use.C.EStep);
+ ll = logLikelihood.logistic(user, item, y, x, w, z, alpha, beta, u, v, b, g0, G, d0, D, var_alpha, var_beta, var_u, var_v, ars_alpha=ars_alpha, beta.int, debug=debug, use.C=use.C.EStep);
cat("end E-STEP (logLikelihood = ",ll," + constant, used ",time.used[3]," sec)\n",
"start M-STEP\n",sep="");
}
@@ -289,7 +294,6 @@ fit.MCEM.logistic <- function(
d0old <- d0; var_betaold <- var_beta;
Dold <- D; var_vold <- var_v;
if(doMstep==1){
- # gc(verbose=T);
mc_m = MC_MStep_logistic_arscid(
user, item, y, x, b, w, z, o,
alpha=alpha, alpha.sumvar=mc_e$alpha.sumvar, beta=beta, beta.sumvar=mc_e$beta.sumvar,
@@ -324,17 +328,18 @@ fit.MCEM.logistic <- function(
D = matrix(0, nrow=ncol(z), ncol=nFactors)
var_u = rep(1,nFactors)
var_v = rep(1,nFactors)
- }
+ }
+ }
+ if (identifiable) {
+ # Order G, D, u and v
+ ind = order(var_v);
+ var_u = var_u[ind];
+ var_v = var_v[ind];
+ G = G[,ind];
+ D = D[,ind];
+ u = u[,ind];
+ v = v[,ind];
}
- # Order G, D, u and v
- ind = order(var_v);
- var_u = var_u[ind];
- var_v = var_v[ind];
- G = G[,ind];
- D = D[,ind];
- u = u[,ind];
- v = v[,ind];
-
if(verbose > 0){
r <- dim(D)[2]
cat("var_alpha=",var_alpha,"\n");
@@ -343,7 +348,6 @@ fit.MCEM.logistic <- function(
cat("var_v=",var_v,"\n");
cat("b=",b,"\n");
if(beta.int) cat("mean(beta)=",mean(beta),"\n")
- #cat("Alpha for logistic spline = ",ars_alpha,"\n");
cat("bdiff =",max(abs(b-bold)/(abs(bold) + delta1)),"\n")
cat("g0diff=",max(abs(g0 - g0old)/(abs(g0old) + delta1)),"\n")
cat("d0diff=",max(abs(d0 - d0old)/(abs(d0old) + delta1)),"\n")
@@ -357,9 +361,7 @@ fit.MCEM.logistic <- function(
o2 = x %*% b + o; ep = exp(o2)/(1+exp(o2))
cat("E(P(click)) = ", mean(ep), "\n")
}
- #LL[iter+1] = logLikelihood.logistic(user, item, y, x, w, z, alpha, beta, u, v, b, g0, G, d0, D, var_alpha, var_beta, var_u, var_v, ars_alpha, beta.int, debug, use.C.EStep);
-
- LL[iter+1] = logLikelihood.logistic(user, item, y, x, w, z, alpha, beta, u, v, b, g0, G, d0, D, var_alpha, var_beta, var_u, var_v, ars_alpha, beta.int=F, debug, use.C.EStep);
+ LL[iter+1] = logLikelihood.logistic(user, item, y, x, w, z, alpha, beta, u, v, b, g0, G, d0, D, var_alpha, var_beta, var_u, var_v, ars_alpha, beta.int, debug, use.C.EStep);
time.used = proc.time() - b.time;
time.used.2 = time.used;
View
60 src/RLFM-ars-logistic/R/regression.R
@@ -423,7 +423,7 @@ MC_MStep_logistic_arscid <- function(
user, item, y, x, b, w, z,
o, alpha, alpha.sumvar, beta, beta.sumvar, u, u.sumvar, v, v.sumvar,
debug=0, lm=F, use.glmnet=F, fit.ars.alpha=F, fit.regression=T,
- beta.int=T, main.effects=F,...
+ beta.int=T, main.effects=F, ...
){
nObs = length(y);
nUsers = length(alpha);
@@ -434,15 +434,15 @@ MC_MStep_logistic_arscid <- function(
# find ars alpha ...
if (fit.ars.alpha )
- {
+ {
ars_alpha = estalpha(y,b,o);
output$ars_alpha = ars_alpha;
- }
+ }
else
- {
+ {
output$ars_alpha = 0.5;
ars_alpha = 0.5;
- }
+ }
# determine b and var_y
if(beta.int == F){
@@ -461,19 +461,17 @@ MC_MStep_logistic_arscid <- function(
a0 = fit0$glmnet.fit$a0[lambdaind];
coef = fit0$glmnet.fit$beta[,lambdaind];
fit = list();
- fit$coefficients = as.vector(c(a0,coef));
+ fit$coefficients = as.vector(c(a0,coef));
} else {
- #fit = glm(y ~ x -1, family=binomial(link = "logit"),offset=o, model=F);
# fit as covariate
nobs = length(x)
x = cbind(matrix(x,length(x),1), rep(0,length(x)))
-
+
fit = bayesglm(y ~ x - 1, family=binomial(link="logit"), offset=o ,model=F, prior.scale = 5);
-
+
fit$coef = fit$coef[1]; fit$coefficients=fit$coefficients[1]; x = matrix(x[,1],nobs,1)
}
}
- #if(length(fit$coef) != ncol(x)) stop("length(fit$coef) != ncol(x)");
output$b = fit$coefficients;
} else {
#fit in random effects heirarchy inside of beta ... but still center for computation ...
@@ -484,7 +482,6 @@ MC_MStep_logistic_arscid <- function(
{
stop("Currently fit.ars.alpha=T only works for ncol(x)==1");
}
- #cat("use.glmnet=",use.glmnet,"\n");
cat("fit regression=",fit.regression,"\n")
cat("intercept in beta prior =",beta.int,"\n")
@@ -502,15 +499,14 @@ MC_MStep_logistic_arscid <- function(
# determin d0 and var_beta ( and b if in heirarchy )
if (beta.int) z2 = cbind(1,z) else z2=z
- if (use.glmnet==F ) {
+ if (use.glmnet==F) {
fit = fit.forMainEffect.bayesglm(beta, z2, lm=lm,...);
} else {
fit = fit.forMainEffect.glmnet(beta, z2, ...);
}
output$d0 = fit$coef;
output$var_beta = (sum(fit$rss) + beta.sumvar) / nItems;
- #output$var_beta = 1;
if(!main.effects){
# determin G and var_u
@@ -522,13 +518,9 @@ MC_MStep_logistic_arscid <- function(
fit = fit.forFactors.glmnet(u, w, ...);
}
output$G = fit$coef;
-
- #cat("var_u.rss=",fit$rss,"\n");
- #cat("u.sumvar=",u.sumvar,"\n");
- #output$var_u = (fit$rss + u.sumvar)/nUsers;
- #output$var_u = (fit$rss + u.sumvar) / (nUsers * nFactors);
- #output$var_u = 1;
- output$var_u = rep(1,nFactors);
+
+ # Whether or not identifiable==T, var_u should always be 1
+ output$var_u = rep(1,nFactors);
# determin D and var_v
if (use.glmnet==F)
@@ -539,41 +531,25 @@ MC_MStep_logistic_arscid <- function(
fit = fit.forFactors.glmnet(v, z, ...);
}
output$D = fit$coef;
- cat("var_v.rss=",fit$rss,"\n");
+ cat("var_v.rss=",fit$rss,"\n");
cat("v.sumvar=",v.sumvar,"\n");
- output$var_v = (fit$rss + v.sumvar)/nItems;
- #output$var_v = (fit$rss + v.sumvar) / (nItems * nFactors);
- #output$var_v = rep(1,nFactors);
-
+ output$var_v = (fit$rss + v.sumvar)/nItems;
}
} else
{
output$var_alpha = (sum(alpha^2) + alpha.sumvar) / nUsers;
- #output$var_beta = 1;
output$var_beta = (sum(beta^2) + beta.sumvar) / nItems;
- #if(!main.effects) output$var_u = (sum(u^2) + u.sumvar) / (nUsers * nFactors);
- #output$var_v = rep(1,nFactors);
- #if (!main.effects) output$var_u = (apply(u^2,2,sum)+u.sumvar)/nItems;
output$var_u = rep(1,nFactors);
if (!main.effects) output$var_v = (apply(v^2,2,sum)+v.sumvar)/nItems;
- #if(!main.effects) output$var_v = (sum(v^2) + v.sumvar) / (nItems * nFactors);
}
if( beta.int == T && fit.regression == F)
- {
- #z = cbind(rep(1,length(beta)),rep(0,length(beta)))
- #fit = fit.forMainEffect.bayesglm(beta, z, lm=lm,...);
+ {
output$d0 = rep(0,dim(z)[2]); output$d0[1] = mean(beta);
output$var_alpha = (sum(alpha^2) + alpha.sumvar) / nUsers;
- #output$var_beta = 1;
output$var_beta = (sum((beta - mean(beta))^2) + beta.sumvar) / nItems;
- #if(!main.effects) output$var_u = (sum(u^2) + u.sumvar) / (nUsers * nFactors);
- #output$var_u = 1;
- #if(!main.effects) output$var_v = (sum(v^2) + v.sumvar) / (nItems * nFactors);
- #if (!main.effects) output$var_u = (apply(u^2,2,sum)+u.sumvar)/nItems;
- #output$var_v = rep(1,nFactors);
- output$var_u = rep(1,nFactors);
- if (!main.effects) output$var_v = (apply(v^2,2,sum)+v.sumvar)/nItems;
- }
+ output$var_u = rep(1,nFactors);
+ if (!main.effects) output$var_v = (apply(v^2,2,sum)+v.sumvar)/nItems;
+ }
return(output);
}
View
48 src/RLFM-ars-logistic/R/simple_examples.R
@@ -0,0 +1,48 @@
+library(Matrix);
+source("src/R/c_funcs.R");
+source("src/R/util.R");
+source("src/R/model/util.R");
+source("src/R/model/multicontext_model_genData.R");
+source("src/R/model/multicontext_model_utils.R");
+set.seed(0);
+
+d = genMainEffectData(
+ nSrcNodes=3, nDstNodes=5, nObs=1000000, intercept=-1,
+ var_y=1, var_alpha=0.5, var_beta=0.5, binary.response=TRUE
+)
+
+dyn.load("lib/c_funcs.so");
+dyn.load("lib/arslogistic.so");
+source("src/R/model/multicontext_model_EM.R");
+source("src/R/model/multicontext_model_utils.R");
+source("src/RLFM-ars-logistic/R/c_funcs.R");
+source("src/RLFM-ars-logistic/R/util.R");
+source("src/RLFM-ars-logistic/R/fit.MCEM.logistic.R");
+source("src/RLFM-ars-logistic/R/regression.R");
+set.seed(1); # NOTE: set.seed doesn't work because the ARS code uses its own random number generator
+library(arm)
+
+ans = fit.ARS.logistic(
+ nIter=3, # Number of EM iterations
+ nSamples=100, nBurnin=20, # Number of samples and burnin drawn in each E-step: could be a vector of size nIter.
+ data.train=list(obs=d$obs,feature=d$feature), # Training data = list(obs, feature)
+ nFactors=3, # Number of factors (i.e., number of dimensions of u)
+ init.model=NULL, # Initial model = list(factor, param). Set to NULL to use the default.
+ # initialization parameters
+ var_alpha=0.1, var_beta=0.1, var_v=0.1, var_u=0.1,
+ # others
+ out.level=2, # out.level=1: Save the parameter values out.dir/est.highestCDL and out.dir/est.last
+ # out.level=2: Save the parameter values of each iteration i to out.dir/est.i
+ out.dir="./fit-ARS-logistic-example",
+ out.append=TRUE,
+ debug=10, # Set to 0 to disable internal sanity checking; Set to 10 for most detailed sanity checking
+ verbose=10, # Set to 0 to disable console output; Set to 10 to print everything to the console
+ use.glmnet=FALSE,
+ # ARS parameters
+ ars_ninit=3, ars_qcent=c(5.0,50.0,95.0), # number of initial points and the quantiles of the initial points
+ ars_xl=-3, ars_xu=3, # lower bound and upper bound of ARS samples
+ ars_alpha=0.5,
+ main.effects=TRUE,
+ fit.regression=FALSE,
+ identifiable=FALSE
+);
View
6 src/RLFM-ars-logistic/R/util.R
@@ -47,8 +47,8 @@ check.input.logistic <- function(
if(!is.vector(b)) stop("b should be a vector");
if(!is.vector(g0) && !is.matrix(g0)) stop("g0 should be a vector");
if(!is.vector(d0) && !is.matrix(d0)) stop("d0 should be a vector");
- if(!is.matrix(G)) stop("G should be a matrix");
- if(!is.matrix(D)) stop("D should be a matrix");
+ if(!is.matrix(G)) stop("G should be a matrix");
+ if(!is.matrix(D)) stop("D should be a matrix");
if(!is.vector(y)) stop("y should be a vector");
if(!is.vector(user)) stop("user should be a vector");
if(!is.vector(item)) stop("item should be a vector");
@@ -85,7 +85,7 @@ check.input.logistic <- function(
stopIfAnyNull=list("obs$y"=y,"obs$dst.id"=item,"feature$x_dst"=z,"param$D"=D,"param$var_v"=var_v),
check.NA=check.NA
);
-
+
if(version == 1){
if(!length(var_alpha) == 1) stop("var_alpha should have length 1");
if(!length(var_beta) == 1) stop("var_beta should have length 1");

0 comments on commit bda67b6

Please sign in to comment.