forked from stan-dev/bayesplot
-
Notifications
You must be signed in to change notification settings - Fork 1
/
PPC-errors.Rd
186 lines (161 loc) · 6.44 KB
/
PPC-errors.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ppc-errors.R
\name{PPC-errors}
\alias{PPC-errors}
\alias{ppc_error_hist}
\alias{ppc_error_hist_grouped}
\alias{ppc_error_scatter}
\alias{ppc_error_scatter_avg}
\alias{ppc_error_scatter_avg_vs_x}
\alias{ppc_error_binned}
\title{PPC errors}
\usage{
ppc_error_hist(y, yrep, ..., binwidth = NULL, breaks = NULL, freq = TRUE)
ppc_error_hist_grouped(
y,
yrep,
group,
...,
binwidth = NULL,
breaks = NULL,
freq = TRUE
)
ppc_error_scatter(y, yrep, ..., size = 2.5, alpha = 0.8)
ppc_error_scatter_avg(y, yrep, ..., size = 2.5, alpha = 0.8)
ppc_error_scatter_avg_vs_x(y, yrep, x, ..., size = 2.5, alpha = 0.8)
ppc_error_binned(y, yrep, ..., bins = NULL, size = 1, alpha = 0.25)
}
\arguments{
\item{y}{A vector of observations. See \strong{Details}.}
\item{yrep}{An \eqn{S} by \eqn{N} matrix of draws from the posterior
predictive distribution, where \eqn{S} is the size of the posterior sample
(or subset of the posterior sample used to generate \code{yrep}) and \eqn{N} is
the number of observations (the length of \code{y}). The columns of \code{yrep}
should be in the same order as the data points in \code{y} for the plots to make
sense. See \strong{Details} for additional instructions.}
\item{...}{Currently unused.}
\item{binwidth}{Passed to \code{\link[ggplot2:geom_histogram]{ggplot2::geom_histogram()}} to override
the default binwidth.}
\item{breaks}{Passed to \code{\link[ggplot2:geom_histogram]{ggplot2::geom_histogram()}} as an
alternative to \code{binwidth}.}
\item{freq}{For histograms, \code{freq=TRUE} (the default) puts count on the
y-axis. Setting \code{freq=FALSE} puts density on the y-axis. (For many
plots the y-axis text is off by default. To view the count or density
labels on the y-axis see the \code{\link[=yaxis_text]{yaxis_text()}} convenience
function.)}
\item{group}{A grouping variable (a vector or factor) the same length as
\code{y}. Each value in \code{group} is interpreted as the group level
pertaining to the corresponding value of \code{y}.}
\item{size, alpha}{For scatterplots, arguments passed to
\code{\link[ggplot2:geom_point]{ggplot2::geom_point()}} to control the appearance of the points. For the
binned error plot, arguments controlling the size of the outline and
opacity of the shaded region indicating the 2-SE bounds.}
\item{x}{A numeric vector the same length as \code{y} to use as the x-axis
variable.}
\item{bins}{For \code{ppc_error_binned()}, the number of bins to use (approximately).}
}
\value{
A ggplot object that can be further customized using the \strong{ggplot2} package.
}
\description{
Various plots of predictive errors \code{y - yrep}. See the
\strong{Details} and \strong{Plot Descriptions} sections, below.
}
\details{
All of these functions (aside from the \verb{*_scatter_avg} functions)
compute and plot predictive errors for each row of the matrix \code{yrep}, so
it is usually a good idea for \code{yrep} to contain only a small number of
draws (rows). See \strong{Examples}, below.
For binomial and Bernoulli data the \code{ppc_error_binned()} function can be used
to generate binned error plots. Bernoulli data can be input as a vector of 0s
and 1s, whereas for binomial data \code{y} and \code{yrep} should contain "success"
proportions (not counts). See the \strong{Examples} section, below.
}
\section{Plot descriptions}{
\describe{
\item{\code{ppc_error_hist()}}{
A separate histogram is plotted for the predictive errors computed from
\code{y} and each dataset (row) in \code{yrep}. For this plot \code{yrep}
should have only a small number of rows.
}
\item{\code{ppc_error_hist_grouped()}}{
Like \code{ppc_error_hist()}, except errors are computed within levels of a
grouping variable. The number of histograms is therefore equal to the
product of the number of rows in \code{yrep} and the number of groups
(unique values of \code{group}).
}
\item{\code{ppc_error_scatter()}}{
A separate scatterplot is displayed for \code{y} vs. the predictive errors
computed from \code{y} and each dataset (row) in \code{yrep}. For this
plot \code{yrep} should have only a small number of rows.
}
\item{\code{ppc_error_scatter_avg()}}{
A single scatterplot of \code{y} vs. the average of the errors computed
from \code{y} and each dataset (row) in \code{yrep}. For each individual
data point \code{y[n]} the average error is the average of the
errors for \code{y[n]} computed over the the draws from the posterior
predictive distribution.
}
\item{\code{ppc_error_scatter_avg_vs_x()}}{
Same as \code{ppc_error_scatter_avg()}, except the average is plotted on the
\eqn{y}-axis and a a predictor variable \code{x} is plotted on the
\eqn{x}-axis.
}
\item{\code{ppc_error_binned()}}{
Intended for use with binomial data. A separate binned error plot (similar
to \code{arm::binnedplot()}) is generated for each dataset (row) in \code{yrep}. For
this plot \code{y} and \code{yrep} should contain proportions rather than counts,
and \code{yrep} should have only a small number of rows.
}
}
}
\examples{
y <- example_y_data()
yrep <- example_yrep_draws()
ppc_error_hist(y, yrep[1:3, ])
# errors within groups
group <- example_group_data()
(p1 <- ppc_error_hist_grouped(y, yrep[1:3, ], group))
p1 + yaxis_text() # defaults to showing counts on y-axis
\donttest{
table(group) # more obs in GroupB, can set freq=FALSE to show density on y-axis
(p2 <- ppc_error_hist_grouped(y, yrep[1:3, ], group, freq = FALSE))
p2 + yaxis_text()
}
# scatterplots
ppc_error_scatter(y, yrep[10:14, ])
ppc_error_scatter_avg(y, yrep)
x <- example_x_data()
ppc_error_scatter_avg_vs_x(y, yrep, x)
# ppc_error_binned with binomial model from rstanarm
\dontrun{
library(rstanarm)
example("example_model", package = "rstanarm")
formula(example_model)
# get observed proportion of "successes"
y <- example_model$y # matrix of "success" and "failure" counts
trials <- rowSums(y)
y_prop <- y[, 1] / trials # proportions
# get predicted success proportions
yrep <- posterior_predict(example_model)
yrep_prop <- sweep(yrep, 2, trials, "/")
ppc_error_binned(y_prop, yrep_prop[1:6, ])
}
}
\references{
Gelman, A., Carlin, J. B., Stern, H. S., Dunson, D. B., Vehtari,
A., and Rubin, D. B. (2013). \emph{Bayesian Data Analysis.} Chapman & Hall/CRC
Press, London, third edition. (Ch. 6)
}
\seealso{
Other PPCs:
\code{\link{PPC-censoring}},
\code{\link{PPC-discrete}},
\code{\link{PPC-distributions}},
\code{\link{PPC-intervals}},
\code{\link{PPC-loo}},
\code{\link{PPC-overview}},
\code{\link{PPC-scatterplots}},
\code{\link{PPC-test-statistics}}
}
\concept{PPCs}