Permalink
Browse files

Add ARS logistic response sampling

  • Loading branch information...
1 parent 75565ea commit 167c55f47589c89fb0b5130e2c639b4d41f36b9a Bee-Chung Chen committed Feb 29, 2012
View
@@ -0,0 +1,17 @@
+
+# Copyright (c) 2012, Yahoo! Inc. All rights reserved.
+# Copyrights licensed under the New BSD License. See the accompanying LICENSE file for terms.
+# Author: Deepak Agarwal
+
+all:c_funcs
+
+srcfiles = arms.c arsspline.c
+
+
+
+c_funcs:
+ R CMD SHLIB $(srcfiles) -o c_funcs.so
+
+.PHONY : clean
+clean:
+ rm *.so *.o
View
@@ -0,0 +1,134 @@
+###
+### Copyright (c) 2012, Yahoo! Inc. All rights reserved.
+### Copyrights licensed under the New BSD License. See the accompanying LICENSE file for terms.
+### Author: Deepak Agarwal
+###
+
+get.sp.des <- function(X){
+ #getting a sparse representation of design matrix X: obsid fval nfobs
+ ncov <- dim(X)[2];nobs <- dim(X)[1]
+ obsid <- fval <- nfobs <- NULL
+ for(i in 1:ncov){
+ f <- X[,i]
+ wk <- c(1:nobs)[f != 0]
+ obsid <- c(obsid,wk); fval <- c(fval,f[wk]);nfobs <- c(nfobs,length(wk))
+ }
+ ans <- list(obsid,fval,nfobs)
+ names(ans) <- c("obsid","fval","nfobs")
+ ans
+}
+
+
+get.dense.des <- function(feat){
+ nobs <- max(feat[,2]) #assumes there are no obs with all features absent.
+ ncov <- max(feat[,1]) #assumes there are no features that do not occur at all.
+ X <- matrix(0,nrow=nobs,ncol=ncov)
+ for(i in 1:ncov)
+ X[feat[,2][feat[,1]==i],i] <- feat[,3][feat[,1]==i]
+ X
+}
+
+sim.logistic <- function(ncov,nobs,mbeta=0,sigbeta=1,zfr=.5){
+#simulate the betas (for a fixed ncov): N(m,sig)
+ beta <- rnorm(ncov,mean=mbeta,sd=sqrt(sigbeta))
+#simulate design matrix nobs x ncov (entry ~ N(0,1/sqrt(ncov)))
+ X <- matrix(rnorm(nobs*ncov),ncol=ncov)/sqrt(ncov)
+ nzero <- min(ceiling(nobs*zfr),nobs)
+ for(i in 1:ncov)X[sample(1:nobs,nzero),i] <- 0
+ p <- (1+exp(-X%*%beta))^{-1}
+ y <- rbinom(nobs,1,p)
+ #also create design matrix in sparse format
+ wk <- get.sp.des(X)
+ obsid <- wk$obsid
+ fval <- wk$fval
+ nfobs <- wk$nfobs
+ ans <- list(y,X,obsid,fval,nfobs,beta,mbeta,sigbeta)
+ names(ans) <- c("y","X","obsid","fval","nfobs","beta","mbeta","sigbeta")
+ ans
+}
+
+getlgt <- function(x,id,beta,y,X,mbeta,sigb){
+ beta[id] <- x
+ eta <- X%*%beta
+ idx <- c(1:dim(X)[1])[X[,id]!=0]
+ llik <- sum(y[idx]*eta[idx] - log1p(exp(eta[idx])))
+ lpr <- -.5*sum((x - mbeta[id])^2/sigb[id])
+ llik+lpr
+}
+
+
+arslgt <- function(obsid,featval,nfobs,y,intercept=F,betaprior=NULL,vprior=1.0,vprior.intercept=NULL,
+ nsamp=500,burnin=100,erange=10.0,verbose=T,verboseC=0,isC=T){
+
+ cat("vprior=",vprior,"\n")
+
+ y <- as.integer(y);ncov <- as.integer(length(nfobs));nobs <- as.integer(length(y))
+ pglb <- length(y[y==1])/nobs
+ if(pglb==0 | pglb==1)stop("only one label type in the data")
+
+ off=if(intercept)rep(log(pglb)-log(1.0-pglb),length(y)) else rep(0,length(y))
+
+ beta0 <- if(!is.null(betaprior))betaprior else rep(0,ncov)
+ if(length(beta0)!=ncov)stop("prior of beta not specified correctly")
+ varbeta <- rep(vprior,ncov)
+ if(!is.null(vprior.intercept))varbeta[ncov] <- vprior.intercept
+ betaout <- beta0
+
+ #parameter settings to run adaptive rejection sampler, no need to change.
+ ninit <- as.integer(3); xl <- as.double(-abs(erange));xu <- as.double(abs(erange));xi <- as.double(rep(0.0,ninit))
+ eta <- as.double(rep(0,10*nobs));
+
+ ncent <- as.integer(3);qcent <- as.double(c(5.0,50.0,95.0));xcent <- as.double(rep(0.0,ncent)); neval <- as.integer(rep(0,ncov))
+ for(i in 1:ninit){
+ xi[i] <- xl + (i + 1.0)*(xu - xl)/(ninit + 1.0)
+ if(xi[i] >= xu)xi[i]=xi[i] - .1;
+ if(xi[i] <= xl)xi[i] = xi[i] + .1;
+ }
+ XI <- as.double(rep(xi,ncov))
+ #end of parameter settings.
+ Nsamp <- nsamp+burnin
+ betamean <- rep(0,ncov); betavar <- rep(0,ncov)
+ #sampv <- NEVAL <- matrix(NA,nrow=nsamp,ncol=ncov)
+
+ if(isC){
+ if(burnin > 0){
+ ans <- .C("ARSSAMP",off,beta0,varbeta,as.integer(obsid),as.double(featval),as.integer(nfobs),y,ncov,nobs,betaout,as.double(betamean),as.double(betavar),qcent,xcent,ncent,ninit,xl,xu,XI,
+ eta,neval,as.integer(burnin),as.integer(verboseC))
+ betaout <- as.double(ans[[10]])
+ }
+ betamean <- rep(0,ncov); betavar <- rep(0,ncov)
+ ans <- .C("ARSSAMP",off,beta0,varbeta,as.integer(obsid),as.double(featval),as.integer(nfobs),y,ncov,nobs,betaout,as.double(betamean),as.double(betavar),qcent,xcent,ncent,ninit,xl,xu,XI,
+ eta,neval,as.integer(nsamp),as.integer(verboseC))
+ betamean <- ans[[11]]
+ betavar <- ans[[12]]
+ }
+ else{
+ #sampling.
+
+ for(i in 1:Nsamp){
+ ans <- .C("ARSLOGISTICSPARSE",off,beta0,varbeta,as.integer(obsid),as.double(featval),as.integer(nfobs),y,ncov,nobs,betaout,qcent,xcent,ncent,ninit,xl,xu,XI,
+ eta,neval,as.integer(verboseC))
+ sampv <- ans[[10]]
+ #sampv[i,] <- ans[[10]]
+ #NEVAL[i,] <- ans[[19]]
+ betaout <- as.double(sampv)
+ if(verbose)cat("sample=",sampv[1:min(ncov,5)],"\n")
+ if(i > burnin){
+ j <- i - burnin
+ betamean = betamean + (sampv - betamean)/j
+ betavar = betavar + (sampv*sampv - betavar)/j
+ }
+
+
+
+ XItemp <- as.double(ans[[17]])
+ if(!any(is.na(XItemp)))XI <- XItemp
+ rm(ans)
+ #if(any(is.na(apply(sampv,2,mean))))stop("numerical problem")
+ }
+ }
+ betavar = betavar - betamean*betamean
+ ret <- list(mean=betamean,var=betavar)
+ #samples=sampv[(burnin+1):nsamp,,drop=F],intercept=off[1],neval=NEVAL)
+ ret
+}
View
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2012, Yahoo! Inc. All rights reserved.
+ * Copyrights licensed under the New BSD License. See the accompanying LICENSE file for terms.
+ * Author: Deepak Agarwal
+ */
+
+
+#include "utilR.h"
+#include "arms.h"
+
+// main function
+double logexp(double x){
+ double ans;
+ if(x > 0)ans = x + log1p(exp(-x)); else ans = log1p(exp(x));
+ return ans;
+}
+
+double lgtfn(double b,void *W){
+ REG *WW;
+ int i;
+ double sum,etanew,a,v;
+
+ WW = W;
+ sum = 0.0;
+ for(i=1;i <= WW->nobs;++i){
+ etanew=WW->eta[R_VEC(i)] + WW->X[R_MAT(i,WW->id,WW->nobs)]*(b - WW->betacurr[R_VEC(WW->id)]);
+ //printf("etanew=%f\n",etanew);
+ sum += ((WW->Y[R_VEC(i)] > 0)? etanew:0) - logexp(etanew);
+ }
+ // adding log-prior
+ a=(b - WW->beta0[R_VEC(WW->id)]);
+ v= WW->varbeta[R_VEC(WW->id)];
+ sum -= .5*a*a/v;
+ return sum;
+}
+
+void debuglgt(double *b,double *off,double *beta0,double *varbeta,int *id,
+ double *X,int *Y,int *nFact,int *nObs,double *betaout,double *out){
+ REG R;
+ double *eta,lin;
+ int i,j;
+
+ eta = (double *)Calloc(*nObs,double);
+ for(i=1;i <= *nObs;++i){
+ eta[R_VEC(i)] = off[R_VEC(i)];
+ lin=0.0;
+ for(j=1;j <= *nFact;++j){
+ lin += X[R_MAT(i,j,*nObs)]*betaout[R_VEC(j)];
+ }
+ eta[R_VEC(i)] += lin;
+ }
+
+ R.eta = eta; R.Y=Y;R.X=X;R.betacurr=betaout;R.beta0=beta0;R.varbeta=varbeta; R.id = *id;R.nobs=*nObs;
+ *out = lgtfn(*b,&R);
+ Free(eta);
+ return;
+}
+//off: offset
+//beta0: priormean
+//varbeta: vector of prior variance
+//X: vectorized design matrix read columnwise.
+//Y: response
+// *nFact: number of covariates (including intercept)
+// *nObs: number of observations
+// betaout: previous beta value
+//ninit : number of points in envelope
+// xl: lower bound(envelope)
+//xu: upper bound(envelope)
+
+// to be fixed: eta is dynamically allocated, this should be done once outside the program and re-used for all calls to this function.
+void ARSLOGISTIC(double *off,double *beta0,double *varbeta,
+ double *X,int *Y,int *nFact,int *nObs,double *betaout,
+ int *ninit,double *xl,double *xu,double *eta){
+ // betaout can be used as initial values if required.
+ // first coordinate of betaout is the intercept.
+ //sample betaout
+ // for(i=1;i<=*nFact+1;++i)sample betaout[R_VEC(i)]
+ double xsamp[1],convex,lin,XL,XU;
+ int npoint,nsamp,neval,i,j,flag,count;
+ REG R;
+
+ convex=1.0;npoint=500,nsamp=1,neval=0;flag=1;count=0;
+ XL = *xl; XU= *xu;
+ //eta = (double *)Calloc(*nObs,double);
+ for(i=1;i <= *nObs;++i){
+ eta[R_VEC(i)] = off[R_VEC(i)];
+ lin=0.0;
+ for(j=1;j <= *nFact;++j){
+ lin += X[R_MAT(i,j,*nObs)]*betaout[R_VEC(j)];
+ }
+ eta[R_VEC(i)] += lin;
+ }
+ //printf("xl=%f\n",*xl);
+ R.eta = eta; R.Y=Y;R.X=X;R.betacurr=betaout;R.beta0=beta0;R.varbeta=varbeta;R.nobs=*nObs;
+ for(j=1;j <= *nFact;++j){
+ R.id=j;
+ //while(flag!=0 && count <=50){
+ //printf("in ars\n");
+ flag = arms_simple(*ninit,&XL,&XU,lgtfn,&R,0,&betaout[R_VEC(j)],xsamp);
+ if(flag > 0){printf("err=%d\t in ars",flag); exit(1);}
+ //flag=arms(xi,*ninit,&XL,&XU,lgtfn,&R,&convex,npoint,0,&betaout[R_VEC(j)],xsamp,nsamp,qcent,xcent,*ncent,&neval);
+ // printf("flag=%d\n",flag);
+ //count++;
+ //}
+ //if(count > 50){printf("error in ars\n");exit(1);
+ //}
+ //flag=1;count=0;
+ //update etas
+ for(i=1;i <= *nObs;++i)eta[R_VEC(i)] += X[R_MAT(i,j,*nObs)]*(xsamp[0]-betaout[R_VEC(j)]);
+ betaout[R_VEC(j)]=xsamp[0];
+ R.betacurr[R_VEC(j)] = xsamp[0];
+
+ }
+ //Free(eta);
+ return;
+}
+
+
+
Oops, something went wrong.

0 comments on commit 167c55f

Please sign in to comment.