Permalink
Browse files

chech in the code

  • Loading branch information...
1 parent e0944eb commit 36ab28804167e6207a65a4ffb2cea12104bc08dd Bee-Chung Chen committed Oct 19, 2011
Showing with 22,833 additions and 0 deletions.
  1. +31 −0 LICENSE
  2. +17 −0 Makefile
  3. +1 −0 Makevars
  4. +244 −0 src/C/factor_model_MC_EStep.c
  5. +445 −0 src/C/factor_model_multicontext.c
  6. +1,113 −0 src/C/factor_model_util.c
  7. +298 −0 src/C/factor_model_util.h
  8. +270 −0 src/C/factor_model_util2.cpp
  9. +121 −0 src/C/hierarchical.c
  10. +45 −0 src/C/hierarchical.h
  11. +93 −0 src/C/multithread.cpp
  12. +47 −0 src/C/multithread.hpp
  13. +126 −0 src/C/pagerank.c
  14. +1,213 −0 src/C/util.c
  15. +218 −0 src/C/util.h
  16. +449 −0 src/C/utils.hpp
  17. +984 −0 src/R/c_funcs.R
  18. +118 −0 src/R/examples/hierarchical-examples.R
  19. +269 −0 src/R/examples/multicontext_model.R
  20. +26 −0 src/R/examples/optim.R
  21. +112 −0 src/R/examples/optim.c
  22. +70 −0 src/R/model/GLMNet.R
  23. +68 −0 src/R/model/Notation-multicontext.txt
  24. +56 −0 src/R/model/RandomForest.R
  25. +429 −0 src/R/model/factor_model_MCEM.R
  26. +148 −0 src/R/model/factor_model_MCEM_MStep.R
  27. +193 −0 src/R/model/factor_model_genData.R
  28. +605 −0 src/R/model/factor_model_utils.R
  29. +122 −0 src/R/model/hierarchical_genData.R
  30. +656 −0 src/R/model/hierarchical_utils.R
  31. +710 −0 src/R/model/hierarchical_utils.debug.R
  32. +730 −0 src/R/model/multicontext_model_EM.R
  33. +379 −0 src/R/model/multicontext_model_MStep.R
  34. +225 −0 src/R/model/multicontext_model_genData.R
  35. +965 −0 src/R/model/multicontext_model_utils.R
  36. +179 −0 src/R/model/pagerank.R
  37. +640 −0 src/R/model/util.R
  38. +478 −0 src/R/util.R
  39. +8 −0 src/hierarchical/C/Makefile
  40. +2 −0 src/hierarchical/C/Makevars
  41. +621 −0 src/hierarchical/C/Rinterface.cpp
  42. +2,252 −0 src/hierarchical/C/betabinomial.cpp
  43. +354 −0 src/hierarchical/C/cube.cpp
  44. +254 −0 src/hierarchical/C/cube.hpp
  45. +212 −0 src/hierarchical/C/dag.c
  46. +83 −0 src/hierarchical/C/dag.h
  47. +81 −0 src/hierarchical/C/dag.hpp
  48. +462 −0 src/hierarchical/C/model.h
  49. +118 −0 src/hierarchical/C/tree-smoothing.cpp
  50. +58 −0 src/hierarchical/C/tree-smoothing.h
  51. +211 −0 src/hierarchical/C/util.cpp
  52. +252 −0 src/hierarchical/C/util.h
  53. +796 −0 src/hierarchical/R/betabinomial.R
  54. +573 −0 src/hierarchical/R/c_funcs.R
  55. +118 −0 src/hierarchical/R/utils.R
  56. +752 −0 src/multi-app/C/EM.cpp
  57. +177 −0 src/multi-app/C/R_interface.cpp
  58. +129 −0 src/multi-app/C/definition.hpp
  59. +9 −0 src/multi-app/Makefile
  60. +1 −0 src/multi-app/Makevars
  61. +84 −0 src/multi-app/Notation.txt
  62. +354 −0 src/multi-app/R/example/fitting.R
  63. +439 −0 src/multi-app/R/model/fit-EM.R
  64. +136 −0 src/multi-app/R/model/generate-data.R
  65. +545 −0 src/multi-app/R/model/util.R
  66. +843 −0 src/multi-app/R/utils.R
  67. +16 −0 src/multi-app/ReadMe.txt
View
31 LICENSE
@@ -0,0 +1,31 @@
+Copyright (c) 2011, Yahoo! Inc. All rights reserved.
+
+Redistribution and use of this software in source and binary forms,
+with or without modification, are permitted provided that the following
+conditions are met:
+
+* Redistributions of source code must retain the above
+ copyright notice, this list of conditions and the
+ following disclaimer.
+
+* Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the
+ following disclaimer in the documentation and/or other
+ materials provided with the distribution.
+
+* Neither the name of Yahoo! Inc. nor the names of its
+ contributors may be used to endorse or promote products
+ derived from this software without specific prior
+ written permission of Yahoo! Inc.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
View
@@ -0,0 +1,17 @@
+
+all: lib c_funcs
+
+pagerank: src/C/pagerank.c src/C/util.h
+ R CMD SHLIB src/C/pagerank.c -o lib/pagerank.so
+
+factor: src/C/util.h src/C/factor_model_util.h src/C/factor_model_MC_EStep.c src/C/util.c src/C/factor_model_util.c
+ R CMD SHLIB src/C/util.c src/C/factor_model_util.c src/C/factor_model_MC_EStep.c -o lib/factor.so
+
+c_funcs: src/C/util.h src/C/factor_model_util.h src/C/pagerank.c src/C/factor_model_MC_EStep.c src/C/util.c src/C/factor_model_util.c src/C/factor_model_util2.cpp src/C/hierarchical.c src/C/hierarchical.h src/C/factor_model_multicontext.c
+ R CMD SHLIB src/C/util.c src/C/factor_model_util.c src/C/factor_model_MC_EStep.c src/C/pagerank.c src/C/hierarchical.c src/C/factor_model_multicontext.c src/C/factor_model_util2.cpp -o lib/c_funcs.so
+
+clean:
+ rm -f src/C/*.o src/C/*.so lib/*.so
+
+lib:
+ mkdir lib
View
@@ -0,0 +1 @@
+PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) -Wall
@@ -0,0 +1,244 @@
+/*
+ Copyright (c) 2011, Yahoo! Inc. All rights reserved.
+ Copyrights licensed under the New BSD License. See the accompanying LICENSE file for terms.
+
+ Author: Bee-Chung Chen
+*/
+
+#include <R.h>
+#include <Rmath.h>
+#include <R_ext/Lapack.h>
+#include <stdio.h>
+#include <time.h>
+#include "util.h"
+#include "factor_model_util.h"
+
+// ----------------------------------------------------------------------------
+// MCEM_EStep
+// ----------------------------------------------------------------------------
+// Notation: fErr_{ij} = y_{ij} - (alpha_i + beta_j + v_i' v_j)
+//
+// {alpha,beta,v,fErr}_mean are the Monte-Carlo means of alpha, beta, ...
+// {alpha,beta,v,fErr}_sumvar are the sums of the Monte-Carlo variances over all alpha's, ...
+//
+// if outputFactorVar == 1, then
+// {alpha,beta,v}_outputVar will contain the Monte-Carlo variance (cov matrix) for each individual user
+// otherwise (outputFactorVar == 0), {alpha,beta,v}_outputVar will not be changed
+//
+// nVar_{y,alpha,...} specifies the length of input var_{y,alpha,...}
+//
+// SET nVar_{alpha,beta,...} = 0 to fix the factor values (i.e., prior variance = 0)
+// ----------------------------------------------------------------------------
+void MCEM_EStep(
+ // INPUT (initial factor values) & OUTPUT (Monte Carlo mean of factor values)
+ double* alpha_mean/*nUsers x 1*/, double* beta_mean/*nUsers x 1*/,
+ double* v_mean/*nUsers x nFactors*/,
+ // OUTPUT
+ double* alpha_sumvar/*1x1*/, double* alpha_outputVar/*nUsers x 1*/,
+ double* beta_sumvar/*1x1*/, double* beta_outputVar/*nUsers x 1*/,
+ double* v_sumvar/*1x1*/, double* v_outputVar/*nUsers x nFactors x nFactors*/,
+ double* fErr_mean/*nObs x 1*/, double* fErr_sumvar/*1x1*/,
+ double* y_pred_square/*nObs x 1: E[(predicted y)^2] for logistic*/,
+ // INPUT
+ const int* nSamples, const int* nBurnIn,
+ const int* fromIndex/*nObs x 1*/, const int* toIndex/*nObs x 1*/,
+ const double* y/*nObs x 1*/, const double* xb/*nObs x 1*/,
+ const double* g0x_user/*nUsers x 1*/, const double* d0x_user/*nUsers x 1*/,
+ const double* Gx_user/*nUsers x nFactors*/,
+ const double* var_y, const double* var_alpha, const double* var_beta, const double* var_v,
+ const int* dim /*7 x 1*/, const int* nDim /*must be 7*/,
+ // dim = {nObs, nUsers, nFactors,
+ // nVar_y, nVar_alpha, nVar_beta, nVar_v}
+ const int* outputFactorVar,
+ // OTHER
+ const int* debug, const int* verbose
+){
+ int user_i, user_j;
+ const int one = 1;
+ int verbose_nextLevel = (*verbose) - 1;
+ clock_t t_begin_in=0;
+
+ if(*verbose > 0) Rprintf("START MCEM_EStep.C\n");
+
+ if(*nDim != 7) error("nDim should be 7: nDim=%d)",*nDim);
+ const int* nObs = dim+0; const int* nUsers = dim+1; const int* nFactors = dim+2;
+ const int* nVar_y = dim+3; const int* nVar_alpha = dim+4; const int* nVar_beta = dim+5;
+ const int* nVar_v = dim+6;
+
+ if(*verbose > 5){
+ Rprintf(" nObs=%d, nUsers=%d, nFactors=%d\n",*nObs,*nUsers,*nFactors);
+ Rprintf(" nVar_y=%d, nVar_alpha=%d, nVar_beta=%d, nVar_v=%d\n",*nVar_y,*nVar_alpha,*nVar_beta,*nVar_v);
+ }
+
+ // Allocate space for sum and sum-of-squares (or sum of products of a pair of factors)
+ double *alpha_sum = (double*)Calloc(*nUsers, double);
+ double *beta_sum = (double*)Calloc(*nUsers, double);
+ double *v_sum = (double*)Calloc((*nUsers)*(*nFactors), double);
+ double *fErr_sum = (double*)Calloc(*nObs, double);
+ double *fErr_sos = (double*)Calloc(*nObs, double);
+ double *rest = (double*)Calloc(*nObs, double);
+
+ for(int k=0; k<*nObs; k++) y_pred_square[k] = 0;
+
+ double *alpha_sos=NULL, *beta_sos=NULL, *v_sos=NULL;
+ if((*outputFactorVar) == 0){
+ alpha_sos = (double*)Calloc(*nUsers, double);
+ beta_sos = (double*)Calloc(*nUsers, double);
+ v_sos = (double*)Calloc((*nUsers)*(*nFactors), double);
+ }else if((*outputFactorVar) == 1){
+ // use alpha_outputVar, beta_outputVar and v_outputVar
+ for(int k=0; k<*nUsers; k++) alpha_outputVar[k] = 0;
+ for(int k=0; k<*nUsers; k++) beta_outputVar[k] = 0;
+ for(int k=0; k<(*nUsers)*(*nFactors)*(*nFactors); k++) v_outputVar[k] = 0;
+ }else error("outputFactorVar = %d should be only 0 or 1", *outputFactorVar);
+
+ // Allocate space for the observation indices
+ int *author_obsIndex = (int*)Calloc(*nObs, int); int *author_oiStart = (int*)Calloc(*nUsers,int); int *author_oiNum = (int*)Calloc(*nUsers,int);
+ int *voter_obsIndex = (int*)Calloc(*nObs, int); int *voter_oiStart = (int*)Calloc(*nUsers,int); int *voter_oiNum = (int*)Calloc(*nUsers,int);
+
+
+ // Create Observation indices for authors and voters
+ generateObsIndex(author_obsIndex, author_oiStart, author_oiNum, toIndex, nObs, nUsers, debug);
+ generateObsIndex( voter_obsIndex, voter_oiStart, voter_oiNum, fromIndex, nObs, nUsers, debug);
+
+ // Initialize factors
+ // use the memory space of the output to store the current alpha, beta and v
+ double *alpha = alpha_mean; double *beta = beta_mean; double *v = v_mean;
+
+ for(int sampleNo=0; sampleNo<(*nSamples)+(*nBurnIn); sampleNo++){
+
+ if(*verbose > 0){
+ t_begin_in = clock();
+ Rprintf("SAMPLE %3d:",sampleNo);
+ if(sampleNo < *nBurnIn) Rprintf(" Burn-in");
+ else Rprintf(" Ready ");
+ }
+ //----------------------------------------
+ // Draw samples
+ //----------------------------------------
+ if(*nVar_alpha > 0){
+ // Compute y - xb - beta - vv
+ for(int k=0; k<*nObs; k++){
+ user_i = toIndex[k] - 1; user_j = fromIndex[k] - 1;
+ if(*debug > 0){ CHK_C_INDEX(user_i, *nUsers); CHK_C_INDEX(user_j, *nUsers);}
+ double vv = 0; for(int f=0; f<*nFactors; f++) vv += v[C_MAT(user_i,f,*nUsers)] * v[C_MAT(user_j,f,*nUsers)];
+ rest[k] = y[k] - xb[k] - beta[user_j] - vv;
+ }
+ // print_vector("rest: ", rest, *nObs);
+
+ // Sample alpha
+ gaussianPosterior_mainEffect(alpha, NULL, NULL, &one, toIndex, rest, g0x_user, NULL, var_y, var_alpha, nObs, nUsers, nVar_y, nVar_alpha, debug);
+ }
+ if(*nVar_beta > 0){
+ // Compute y - xb - alpha - vv
+ for(int k=0; k<*nObs; k++){
+ user_i = toIndex[k] - 1; user_j = fromIndex[k] - 1;
+ if(*debug > 0){ CHK_C_INDEX(user_i, *nUsers); CHK_C_INDEX(user_j, *nUsers);}
+ double vv = 0; for(int f=0; f<*nFactors; f++) vv += v[C_MAT(user_i,f,*nUsers)] * v[C_MAT(user_j,f,*nUsers)];
+ rest[k] = y[k] - xb[k] - alpha[user_i] - vv;
+ }
+ // Sample beta
+ gaussianPosterior_mainEffect(beta, NULL, NULL, &one, fromIndex, rest, d0x_user, NULL, var_y, var_beta, nObs, nUsers, nVar_y, nVar_beta, debug);
+ }
+
+ if(*verbose > 0){
+ double secUsed = ((double)(clock() - t_begin_in)) / CLOCKS_PER_SEC;
+ Rprintf(" draw main: %.1f sec", secUsed);
+ t_begin_in = clock();
+ }
+
+ // Sample v
+ if(*nVar_v > 0){
+ // Compute rest = y - xb - alpha - beta
+ for(int k=0; k<*nObs; k++){
+ user_i = toIndex[k] - 1; user_j = fromIndex[k] - 1;
+ if(*debug > 0){ CHK_C_INDEX(user_i, *nUsers); CHK_C_INDEX(user_j, *nUsers);}
+ rest[k] = y[k] - xb[k] - alpha[user_i] - beta[user_j];
+ }
+
+ gaussianPosterior_SelfInteraction(
+ v, NULL, NULL, &one, fromIndex, toIndex, rest, Gx_user,
+ var_y, var_v, nObs, nUsers, nFactors, nVar_y, nVar_v,
+ author_obsIndex, author_oiStart, author_oiNum, voter_obsIndex, voter_oiStart, voter_oiNum, debug
+ );
+ }
+
+ if(*verbose > 0){
+ double secUsed = ((double)(clock() - t_begin_in)) / CLOCKS_PER_SEC;
+ Rprintf(" + factor: %.1f sec", secUsed);
+ t_begin_in = clock();
+ }
+
+ // DEBUG CODE:
+ // print_matrix(" s = ", s, *nUsers, *nTopics);
+
+ if(sampleNo < *nBurnIn){
+ if(*verbose > 0) Rprintf("\n");
+ continue;
+ }
+
+ //----------------------------------------
+ // Update statistics & output
+ //----------------------------------------
+ // update {alpha, beta, v}_sum
+ for(int k=0; k<(*nUsers); k++) alpha_sum[k] += alpha[k];
+ for(int k=0; k<(*nUsers); k++) beta_sum[k] += beta[k];
+ for(int k=0; k<(*nUsers)*(*nFactors); k++) v_sum[k] += v[k];
+
+ // update fErr_sum, fErr_sos
+ for(int k=0; k<*nObs; k++){
+ user_i = toIndex[k] - 1; user_j = fromIndex[k] - 1;
+ if(*debug > 0){ CHK_C_INDEX(user_i, *nUsers); CHK_C_INDEX(user_j, *nUsers);}
+ double vv = 0; for(int f=0; f<*nFactors; f++) vv += v[C_MAT(user_i,f,*nUsers)] * v[C_MAT(user_j,f,*nUsers)];
+ double o = y[k] - alpha[user_i] - beta[user_j] - vv;
+ double pred_y = alpha[user_i] + beta[user_j] + vv + xb[k];
+ y_pred_square[k] += pred_y*pred_y;
+ fErr_sum[k] += o;
+ fErr_sos[k] += o*o;
+ }
+
+ // update sample variances of alpha, beta and v
+ if((*outputFactorVar) == 0){
+ for(int k=0; k<(*nUsers); k++) alpha_sos[k] += alpha[k]*alpha[k];
+ for(int k=0; k<(*nUsers); k++) beta_sos[k] += beta[k]* beta[k];
+ for(int k=0; k<(*nUsers)*(*nFactors); k++) v_sos[k] += v[k]* v[k];
+ }else if((*outputFactorVar) == 1){
+ for(int k=0; k<*nUsers; k++) alpha_outputVar[k] += alpha[k]*alpha[k];
+ for(int k=0; k<*nUsers; k++) beta_outputVar[k] += beta[k]* beta[k];
+ for(int k=0; k<(*nUsers); k++)
+ // ONLY store the lower triangle
+ for(int f1=0; f1<*nFactors; f1++) for(int f2=0; f2<=f1; f2++)
+ v_outputVar[C_3DA(k,f1,f2,*nUsers,*nFactors)]
+ += v[C_MAT(k,f1,*nUsers)] * v[C_MAT(k,f2,*nUsers)];
+ }else error("outputFactorVar = %d should be only 0 or 1", *outputFactorVar);
+
+ if(*verbose > 0){
+ double secUsed = ((double)(clock() - t_begin_in)) / CLOCKS_PER_SEC;
+ Rprintf(" + update: %.1f sec\n", secUsed);
+ }
+ }
+
+ if((*outputFactorVar) == 0){
+ computeMeanSumvar(alpha_mean, alpha_sumvar, alpha_sum, alpha_sos, *nUsers, *nSamples);
+ computeMeanSumvar( beta_mean, beta_sumvar, beta_sum, beta_sos, *nUsers, *nSamples);
+ computeMeanSumvar(v_mean, v_sumvar, v_sum, v_sos, (*nUsers)*(*nFactors), *nSamples);
+ }else if((*outputFactorVar) == 1){
+ computeMeanVar(alpha_mean, alpha_sumvar, alpha_outputVar, alpha_sum, *nUsers, 1, *nSamples);
+ computeMeanVar( beta_mean, beta_sumvar, beta_outputVar, beta_sum, *nUsers, 1, *nSamples);
+ computeMeanVar(v_mean, v_sumvar, v_outputVar, v_sum, (*nUsers), (*nFactors), *nSamples);
+ }else error("outputFactorVar = %d should be only 0 or 1", *outputFactorVar);
+
+ computeMeanSumvar(fErr_mean, fErr_sumvar, fErr_sum, fErr_sos, *nObs, *nSamples);
+ for(int k=0; k<*nObs; k++) y_pred_square[k] /= (*nSamples);
+
+ // Free the allocated space
+ // The R Free() would only free a pointer if it is NOT NULL.
+ if((*outputFactorVar) == 0){ Free(alpha_sos); Free(beta_sos); Free(v_sos); }
+
+ Free(alpha_sum); Free(beta_sum); Free(v_sum);
+ Free(fErr_sum); Free(fErr_sos); Free(rest);
+ Free(author_obsIndex); Free(author_oiStart); Free(author_oiNum);
+ Free( voter_obsIndex); Free( voter_oiStart); Free( voter_oiNum);
+
+ if(*verbose > 0) Rprintf("END MCEM_EStep.C\n");
+}
Oops, something went wrong.

0 comments on commit 36ab288

Please sign in to comment.