Skip to content

Commit

Permalink
version0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
Samantha Cook authored and gaborcsardi committed Jun 25, 2005
0 parents commit 67c7537
Show file tree
Hide file tree
Showing 7 changed files with 508 additions and 0 deletions.
18 changes: 18 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
Package: BayesValidate
Version: 0.0
Date: 2005-06-25
Title: BayesValidate Package
Author: Samantha Cook <cook@stat.columbia.edu>.
Maintainer: Samantha Cook <cook@stat.columbia.edu>
Depends: R (>= 2.0.1)
Description: BayesValidate implements the software validation method
described in the paper "Validation of Software for Bayesian
Models using Posterior Quantiles" (Cook, Gelman, and Rubin,
2005). It inputs a function to perform Bayesian inference as
well as functions to generate data from the Bayesian model
being fit, and repeatedly generates and analyzes data to check
that the Bayesian inference program works properly.
License: GPL (>= 2)
Packaged: Thu Mar 30 10:48:35 2006; hornik
Repository: CRAN
Date/Publication: 2006-03-30 09:22:53
6 changes: 6 additions & 0 deletions MD5
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
2def149e00bf3150ef1e49d0f152cb63 *DESCRIPTION
db59b144aedd7665062d86caf61c6b04 *NAMESPACE
1a51e441c41055d183857833a7d17804 *R/quant.R
d123b045f1c4c26a4d4ab21451797aa0 *R/validate.R
94551c15b610f3a6eddcd066f608d227 *man/quant.Rd
bc5d9ee3f1bb44420c20490e3f6da31d *man/validate.Rd
7 changes: 7 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
export(quant,validate)






7 changes: 7 additions & 0 deletions R/quant.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# quant calculates the posterior quantile of the true value of theta
quant <- function (draws) {
n<-length(draws)
rank.theta <- c(1:n)[order(draws)==1] - 1
quantile.theta <- (rank.theta +.5) / n
return(quantile.theta) }

231 changes: 231 additions & 0 deletions R/validate.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
validate <- function ( generate.param, generate.param.inputs=NULL,
generate.data, generate.data.inputs=NULL, analyze.data,
analyze.data.inputs=NULL, n.rep=20,n.batch=NULL,params.batch=NULL,
print.reps=FALSE) {

#----------------------------------------------------------------------------
# Inputs
#
# generate.param : function for generating parameters from prior distribution
# should output a vector of parameters
# function should look like generate.param <- function() {...}
# or generate.param <- function(generate.param.inputs) {...}
# generate.data : function for generating data given parameters
# should take as input the output from generate.param
# should output data matrix to be analyzed
# function should look like
# generate.data <- function(theta.true) {...} or
# generate.data <- function(theta.true, generate.data.inputs)
# {...}
# analyze.data : function for generating sample from posterior distribution
# should take as input the output from generate.data and
# generate.param
# should output a matrix of parameters, each row is one
# parameter draw
# function should look like
# analyze.data <- function(data.rep,theta.true) {...} or
# analyze.data <- function(data.rep,theta.true,
# analyze.data.inputs) {...}
# !! This is the software being tested !!
#
#----------------------------------------------------------------------------
#
# Optional Inputs
#
# generate.param.inputs : inputs to the function generate.param
# generate.data.inputs : inputs to the function generate.data
# (in addition to theta.true)
# analyze.data.inputs : inputs to the function analyze.data
# (in addition to data.rep and theta.true)
# n.batch : lengths of parameter batches. Must sum to n.param
# and correspond to the order of parameters as they
# are output from analyze.data. For example, if
# there are 5 total parameters with the first two in
# one batch and the next three in another batch,
# use n.batch=c(2,3)
# params.batch : names of parameter batches, used as the y axis in
# the output plot. Must have length equal to the
# number of batches. Can consist of text (e.g.,
# params.batch=c("alpha","beta")) or an expression
# (e.g., params.batch=expression(alpha,beta)).
# Not used if n.batch not provided.
# n.rep : number of replications to be performed, default is 20
# print.reps : indcator of whether or not to print the replication
# number, default is FALSE
#
#-----------------------------------------------------------------------------
#
# Variables
#
# n.param : dimension of parameter, total number of parameters,
# length of theta
# theta.true : vector of true parameters, used to generate data and
# calculate posterior quantiles. length equals n.param
# data.rep : matrix of data generated from theta.true
# theta.draws : sample from posterior distribution
# matrix with number of columns equals n.param
# n.draws : number of rows in theta.draws, size of posterior
# sample generated
# quantiles.theta : matrix of posterior quantiles of each component of
# theta.true, number of rows equals n.rep, number of
# columns equals n.param
# p.vals : p-values based on testing for uniformity of the
# quantiles of the components of each scalar component
# of theta
# p.batch : p-values for parameter "batches", e.g., means of
# person-level parameters. These are the p-values used
# in the Bonferroni correction for multiple comparisons
# adj.min.p : the smallest of the batched p-values, with Bonferroni
# correction applied
# z.stats : z statistics corresponding to p-vals and p.batch,
# for plotting

#----------------------------------------------------------------------------
# Initial Setup

##see how long the parameter vector is
if(is.null(generate.param.inputs)) n.param <- length(generate.param()) else
n.param <-length(generate.param(generate.param.inputs))

##create variables for batched parameters
if(!is.null(n.batch)) {
num.batches<-length(n.batch)
##first, make sure batch parameter lengths match with parameter vector length
if(sum(n.batch) != n.param){
print ("Error: Lengths of parameter batches don't sum to length of parameter vector")
return()}
if(!is.null(params.batch)){
if(length(params.batch) != length(n.batch)){
print("Error: Must have same number of named parameters as batches")
return()}
}
##indices to take means over
batch.ind <- rep(0,(num.batches+1))
for(i in 1:num.batches) batch.ind[(i+1)] <- batch.ind[i] + n.batch[i]
##axis values for plot
plot.batch <- rep(1,n.batch[1])
for(i in 2:num.batches) plot.batch<-c(plot.batch,rep(i,n.batch[i]))
}

##initialize quantiles
##if parameters are in batches, make extra columns in quantile matrix for
##batch means
if(!is.null(n.batch))
quantile.theta <- matrix(0,nrow=n.rep,ncol=(n.param+num.batches)) else
quantile.theta <- matrix(0,nrow=n.rep,ncol=n.param)

#---------------------------------------------------------------------------
# Validation Loop

for(reps in 1:n.rep){

##First generate parameters
##theta.true is vector of true parameters, with length n.param
if(is.null(generate.param.inputs)) theta.true <- generate.param() else
theta.true<-generate.param(generate.param.inputs)

##Next generate data
if(is.null(generate.data.inputs))
data.rep <- generate.data(theta.true) else
data.rep <- generate.data(theta.true, generate.data.inputs)

##Now analyze data
if(is.null(analyze.data.inputs))
theta.draws <- analyze.data(data.rep,theta.true) else
theta.draws <- analyze.data(data.rep,theta.true, analyze.data.inputs)

if ( is.matrix(theta.draws) == FALSE )
theta.draws<-as.matrix(theta.draws)

##Make sure that theta.true and theta.draws have same number
##of parameters
if( length(theta.true) != ncol(theta.draws) ){
print("Error: Generate.param and analyze.data must output same number of parameters")
return()}

##If parameters are batched, add means of batched parameters to
##output matrix
if(!is.null(n.batch)){
for(i in 1:num.batches) {
if(n.batch[i]>1){
theta.draws <- cbind(theta.draws,
apply(as.matrix(theta.draws[,(batch.ind[i]+1):batch.ind[(i+1)]]),
1,mean))
theta.true <- c(theta.true,
mean(theta.true[(batch.ind[i]+1):batch.ind[(i+1)]]))
} else {
theta.draws <- cbind(theta.draws,theta.draws[,(batch.ind[i]+1)])
theta.true <- c(theta.true, theta.true[(batch.ind[i]+1)])
}
}
}

##update matrix of posterior quantiles
theta.draws <- rbind(theta.true, theta.draws)
quantile.theta[reps, ] <- apply( theta.draws, 2, quant )

if(print.reps==TRUE) print(reps)
}


#-----------------------------------------------------------------------------
# Analyze simulation output

##calculate z.theta statistics and p-values
quantile.trans <- (apply(quantile.theta, 2, qnorm))^2
q.trans <- apply(quantile.trans,2,sum)
p.vals <- pchisq(q.trans,df=n.rep,lower.tail=FALSE)
z.stats <- abs( qnorm(p.vals) )

lower.lim<-min( min(z.stats-1), -3.5)
upper.lim<-max( max(z.stats+1), 3.5)

##Bonferonni correction
if (is.null(n.batch)) {
adj.min.p <- n.param*min(p.vals)
}
else {
z.batch <- z.stats[(n.param + 1):length(p.vals)]
p.batch <- p.vals[(n.param + 1):length(p.vals)]
adj.min.p <- num.batches*min(p.batch)
}

##Print output
print(paste("Smallest Bonferonni-adjusted p-value: ", round(adj.min.p, 3)))

##plot
if(is.null(n.batch)){
plot(z.stats, rep(1,n.param), xlim=c(0,upper.lim),xlab="",
ylab="",main=expression("Absolute "*z[theta]*" Statistics"),
axes=F)
axis(1,line=.1)} else {
##first plot parameters that are NOT batch means
rows.one<-c(1:num.batches)[n.batch==1]
plot(z.stats[1:n.param][plot.batch%in%rows.one==FALSE],
plot.batch[plot.batch%in%rows.one==FALSE],
xlim=c(0,upper.lim),ylim=c(1,num.batches),xlab="",
ylab="",main=expression("Absolute "*z[theta]*" Statistics"),
axes=F,pch=176)
##now add batch means
points(z.batch,c(1:num.batches),pch=20)
axis(1,line=.1)
if(!is.null(params.batch))
axis(2,at=c(1:max(plot.batch)),tick=FALSE,labels=params.batch,
las=1,line=-1)}
abline(v=0)

#----------------------------------------------------------------------------

##return z statistics and p-value
if(is.null(n.batch)){
return(list(p.vals = p.vals, adj.min.p=adj.min.p))} else {

if(length(z.batch)==n.param)
return(list(p.batch = p.batch, adj.min.p=adj.min.p)) else
return(list(p.vals = p.vals[1:n.param], p.batch = p.batch,
adj.min.p=adj.min.p))
}

}


40 changes: 40 additions & 0 deletions man/quant.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
\name{quant}
\alias{quant}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{ Calculate empirical quantile of the first entry in a vector }
\description{
\code{quant} inputs a vector and returns the empirical quantile of the first
argument in the vector with respect to all entries in the vector. Used as
part of the function validate for Bayesian software validation, this function
is used to calculate the empirical quantile of a "true" parameter value with
respect to a collection of posterior draws of that parameter.
}
\usage{
quant(draws)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{draws}{ Vector of parameter draws, with entry of interest, i.e., the
value whose quantile is being calculated, at the beginning. }
}
\details{
Calculates the rank of the first entry of the vector with respect to all other entries,
subtracts .5, and divides by the length of the vector.
}
\value{
The empirical quantile of the first entry of the vector, a
scalar between 0 and 1.
}
%\references{ ~put references to the literature/web site here ~ }
\author{Samantha Cook \email{cook@stat.columbia.edu} }

% ~Make other sections like Warning with \section{Warning }{....} ~

%\seealso{ ~~objects to See Also as \code{\link{~~fun~~}}, ~~~ }
\examples{
set.seed(314)
x<-rnorm(1000)
quant(x)

}
\keyword{distribution}

0 comments on commit 67c7537

Please sign in to comment.