version0.0

cran · Jun 25, 2005 · 67c7537 · 67c7537
commit 67c7537
Show file tree

Hide file tree

Showing 7 changed files with 508 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,18 @@
+Package: BayesValidate
+Version: 0.0
+Date: 2005-06-25
+Title: BayesValidate Package
+Author: Samantha Cook <cook@stat.columbia.edu>.
+Maintainer: Samantha Cook <cook@stat.columbia.edu>
+Depends: R (>= 2.0.1)
+Description: BayesValidate implements the software validation method
+        described in the paper "Validation of Software for Bayesian
+        Models using Posterior Quantiles" (Cook, Gelman, and Rubin,
+        2005).  It inputs a function to perform Bayesian inference as
+        well as functions to generate data from the Bayesian model
+        being fit, and repeatedly generates and analyzes data to check
+        that the Bayesian inference program works properly.
+License: GPL (>= 2)
+Packaged: Thu Mar 30 10:48:35 2006; hornik
+Repository: CRAN
+Date/Publication: 2006-03-30 09:22:53
diff --git a/MD5 b/MD5
@@ -0,0 +1,6 @@
+2def149e00bf3150ef1e49d0f152cb63 *DESCRIPTION
+db59b144aedd7665062d86caf61c6b04 *NAMESPACE
+1a51e441c41055d183857833a7d17804 *R/quant.R
+d123b045f1c4c26a4d4ab21451797aa0 *R/validate.R
+94551c15b610f3a6eddcd066f608d227 *man/quant.Rd
+bc5d9ee3f1bb44420c20490e3f6da31d *man/validate.Rd
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,7 @@
+export(quant,validate)
+
+
+
+
+
+
diff --git a/R/quant.R b/R/quant.R
@@ -0,0 +1,7 @@
+# quant calculates the posterior quantile of the true value of theta
+quant <- function (draws) {
+	n<-length(draws)
+	rank.theta <- c(1:n)[order(draws)==1] - 1
+	quantile.theta <- (rank.theta +.5) / n
+	return(quantile.theta) }
+
diff --git a/R/validate.R b/R/validate.R
@@ -0,0 +1,231 @@
+validate <- function ( generate.param, generate.param.inputs=NULL, 
+	generate.data, generate.data.inputs=NULL, analyze.data, 
+	analyze.data.inputs=NULL, n.rep=20,n.batch=NULL,params.batch=NULL, 
+	print.reps=FALSE) {
+
+#----------------------------------------------------------------------------
+#        Inputs
+#
+# generate.param : function for generating parameters from prior distribution
+#                  should output a vector of parameters
+#                  function should look like generate.param <- function() {...}
+#                  or generate.param <- function(generate.param.inputs) {...}
+# generate.data  : function for generating data given parameters
+#                  should take as input the output from generate.param
+#                  should output data matrix to be analyzed
+#                  function should look like 
+#		   generate.data <- function(theta.true) {...} or
+#                  generate.data <- function(theta.true, generate.data.inputs)
+#		   {...}
+# analyze.data   : function for generating sample from posterior distribution 
+#                  should take as input the output from generate.data and 
+#		   generate.param 
+#                  should output a matrix of parameters, each row is one 
+#		   parameter draw
+#                  function should look like 
+#		   analyze.data <- function(data.rep,theta.true) {...} or 
+#                  analyze.data <- function(data.rep,theta.true, 
+#		   analyze.data.inputs) {...}
+#                !! This is the software being tested !!
+#
+#----------------------------------------------------------------------------
+#
+#        Optional Inputs
+#
+# generate.param.inputs : inputs to the function generate.param
+# generate.data.inputs  : inputs to the function generate.data 
+#			  (in addition to theta.true)
+# analyze.data.inputs   : inputs to the function analyze.data 
+#			  (in addition to data.rep and theta.true)
+# n.batch               : lengths of parameter batches.  Must sum to n.param 
+#			  and correspond to the order of parameters as they 
+#			  are output from analyze.data.  For example, if 
+#			  there are 5 total parameters with the first two in 
+#			  one batch and the next three in another batch, 
+#			  use n.batch=c(2,3)
+# params.batch		: names of parameter batches, used as the y axis in 
+#			  the output plot.  Must have length equal to the 
+#			  number of batches.  Can consist of text (e.g., 
+#			  params.batch=c("alpha","beta")) or an expression 
+#			  (e.g., params.batch=expression(alpha,beta)).  
+#			  Not used if n.batch not provided.
+# n.rep                 : number of replications to be performed, default is 20
+# print.reps            : indcator of whether or not to print the replication 
+#			  number, default is FALSE
+#
+#-----------------------------------------------------------------------------
+#
+#        Variables
+#
+# n.param           : dimension of parameter, total number of parameters, 
+#		      length of theta
+# theta.true        : vector of true parameters, used to generate data and 
+#		      calculate posterior quantiles.  length equals n.param 
+# data.rep          : matrix of data generated from theta.true
+# theta.draws       : sample from posterior distribution
+#                     matrix with number of columns equals n.param
+# n.draws           : number of rows in theta.draws, size of posterior 
+#		      sample generated
+# quantiles.theta   : matrix of posterior quantiles of each component of 
+#		      theta.true, number of rows equals n.rep, number of 
+#		      columns equals n.param
+# p.vals	    : p-values based on testing for uniformity of the 
+#		      quantiles of the components of each scalar component 
+#		      of theta
+# p.batch	    : p-values for parameter "batches", e.g., means of 
+#	              person-level parameters.  These are the p-values used 
+#                     in the Bonferroni correction for multiple comparisons
+# adj.min.p         : the smallest of the batched p-values, with Bonferroni
+#                     correction applied
+# z.stats	    : z statistics corresponding to p-vals and p.batch, 
+#		      for plotting
+
+#----------------------------------------------------------------------------
+#            Initial Setup
+
+##see how long the parameter vector is
+if(is.null(generate.param.inputs)) n.param <- length(generate.param()) else 
+	n.param <-length(generate.param(generate.param.inputs))
+
+##create variables for batched parameters
+if(!is.null(n.batch)) {
+	num.batches<-length(n.batch)
+  ##first, make sure batch parameter lengths match with parameter vector length
+	if(sum(n.batch) != n.param){ 
+	print ("Error:  Lengths of parameter batches don't sum to length of parameter vector")
+	return()}
+	if(!is.null(params.batch)){
+	if(length(params.batch) != length(n.batch)){
+		print("Error:  Must have same number of named parameters as batches")
+		return()}
+	}
+  ##indices to take means over
+	batch.ind <- rep(0,(num.batches+1))
+	for(i in 1:num.batches) batch.ind[(i+1)] <- batch.ind[i] + n.batch[i]
+  ##axis values for plot
+	plot.batch <- rep(1,n.batch[1])
+	for(i in 2:num.batches) plot.batch<-c(plot.batch,rep(i,n.batch[i]))
+	}
+
+##initialize quantiles
+##if parameters are in batches, make extra columns in quantile matrix for 
+##batch means
+if(!is.null(n.batch)) 
+	quantile.theta <- matrix(0,nrow=n.rep,ncol=(n.param+num.batches)) else
+	quantile.theta <- matrix(0,nrow=n.rep,ncol=n.param)
+
+#---------------------------------------------------------------------------
+#            Validation Loop
+
+for(reps in 1:n.rep){
+
+	##First generate parameters
+	##theta.true is vector of true parameters, with length n.param
+	if(is.null(generate.param.inputs)) theta.true <- generate.param() else 
+		theta.true<-generate.param(generate.param.inputs)
+
+	##Next generate data
+        if(is.null(generate.data.inputs)) 
+		data.rep <- generate.data(theta.true) else
+		data.rep <- generate.data(theta.true, generate.data.inputs)
+
+	##Now analyze data
+	if(is.null(analyze.data.inputs)) 
+		theta.draws <- analyze.data(data.rep,theta.true) else
+		theta.draws <- analyze.data(data.rep,theta.true, analyze.data.inputs)
+
+	if ( is.matrix(theta.draws) == FALSE )  
+		theta.draws<-as.matrix(theta.draws)
+
+	##Make sure that theta.true and theta.draws have same number 
+	##of parameters
+	if( length(theta.true) != ncol(theta.draws) ){
+	   print("Error: Generate.param and analyze.data must output same number of parameters")
+	   return()}
+
+	##If parameters are batched, add means of batched parameters to 
+	##output matrix
+	if(!is.null(n.batch)){
+	  for(i in 1:num.batches) {
+	    if(n.batch[i]>1){
+	    theta.draws <- cbind(theta.draws,
+	      apply(as.matrix(theta.draws[,(batch.ind[i]+1):batch.ind[(i+1)]]),
+	      1,mean))
+	    theta.true <- c(theta.true,
+	      mean(theta.true[(batch.ind[i]+1):batch.ind[(i+1)]]))
+	    } else {
+	    theta.draws <- cbind(theta.draws,theta.draws[,(batch.ind[i]+1)])
+	    theta.true <- c(theta.true, theta.true[(batch.ind[i]+1)])
+	    }
+	  }
+	}
+
+	##update matrix of posterior quantiles
+	theta.draws <- rbind(theta.true, theta.draws)
+	quantile.theta[reps, ] <- apply( theta.draws, 2, quant )
+
+	if(print.reps==TRUE) print(reps)
+}
+
+
+#-----------------------------------------------------------------------------
+#            Analyze simulation output
+
+##calculate z.theta statistics and p-values
+quantile.trans <- (apply(quantile.theta, 2, qnorm))^2
+q.trans <- apply(quantile.trans,2,sum)
+p.vals <- pchisq(q.trans,df=n.rep,lower.tail=FALSE)
+z.stats <- abs( qnorm(p.vals) )
+
+lower.lim<-min( min(z.stats-1), -3.5)
+upper.lim<-max( max(z.stats+1), 3.5)
+
+##Bonferonni correction
+if (is.null(n.batch)) {
+    adj.min.p <- n.param*min(p.vals)
+}
+else {
+    z.batch <- z.stats[(n.param + 1):length(p.vals)]
+    p.batch <- p.vals[(n.param + 1):length(p.vals)]
+    adj.min.p <- num.batches*min(p.batch)
+}
+
+##Print output
+print(paste("Smallest Bonferonni-adjusted p-value: ", round(adj.min.p, 3)))
+
+##plot
+if(is.null(n.batch)){
+	plot(z.stats, rep(1,n.param), xlim=c(0,upper.lim),xlab="",
+		ylab="",main=expression("Absolute "*z[theta]*" Statistics"),
+		axes=F)
+	axis(1,line=.1)} else {
+	##first plot parameters that are NOT batch means
+	rows.one<-c(1:num.batches)[n.batch==1]
+	plot(z.stats[1:n.param][plot.batch%in%rows.one==FALSE],
+		plot.batch[plot.batch%in%rows.one==FALSE],
+		xlim=c(0,upper.lim),ylim=c(1,num.batches),xlab="",
+		ylab="",main=expression("Absolute "*z[theta]*" Statistics"),
+		axes=F,pch=176)
+	##now add batch means
+	points(z.batch,c(1:num.batches),pch=20)
+	axis(1,line=.1)
+	if(!is.null(params.batch)) 	
+		axis(2,at=c(1:max(plot.batch)),tick=FALSE,labels=params.batch,
+		las=1,line=-1)}
+abline(v=0)
+
+#----------------------------------------------------------------------------
+
+##return z statistics and p-value
+if(is.null(n.batch)){
+	return(list(p.vals = p.vals, adj.min.p=adj.min.p))} else {
+
+	if(length(z.batch)==n.param)
+	return(list(p.batch = p.batch, adj.min.p=adj.min.p)) else 
+	return(list(p.vals = p.vals[1:n.param], p.batch = p.batch, 
+            adj.min.p=adj.min.p))
+	}
+
+}
+
+
diff --git a/man/quant.Rd b/man/quant.Rd
@@ -0,0 +1,40 @@
+\name{quant}
+\alias{quant}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Calculate empirical quantile of the first entry in a vector }
+\description{
+  \code{quant} inputs a vector and returns the empirical quantile of the first 
+	argument in the vector with respect to all entries in the vector.  Used as 
+	part of the function validate for Bayesian software validation, this function 
+	is used to calculate the empirical quantile of a "true" parameter value with 
+	respect to a collection of posterior draws of that parameter.
+}
+\usage{
+quant(draws)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{draws}{ Vector of parameter draws, with entry of interest, i.e., the 
+	value whose quantile is being calculated, at the beginning. }
+}
+\details{
+  Calculates the rank of the first entry of the vector with respect to all other entries, 
+	subtracts .5, and divides by the length of the vector.  
+}
+\value{
+  The empirical quantile of the first entry of the vector, a
+	scalar between 0 and 1.
+}
+%\references{ ~put references to the literature/web site here ~ }
+\author{Samantha Cook \email{cook@stat.columbia.edu} }
+
+% ~Make other sections like Warning with \section{Warning }{....} ~
+
+%\seealso{ ~~objects to See Also as \code{\link{~~fun~~}}, ~~~ }
+\examples{
+	set.seed(314)
+	x<-rnorm(1000)
+	quant(x)
+
+}
+\keyword{distribution}