Skip to content

Commit

Permalink
added prior_plot function fix #6, updated documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
fawda123 committed Oct 7, 2022
1 parent da6b5c2 commit 8c25c6c
Show file tree
Hide file tree
Showing 12 changed files with 176 additions and 48 deletions.
15 changes: 8 additions & 7 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: EBASE
Title: Estuarine Bayesian Single-Station Estimation Method for Ecosystem Metabolism
Version: 0.0.0.9003
Date: 2022-09-13
Version: 0.0.0.9004
Date: 2022-10-07
Authors@R: c(
person(given = "Marcus",
family = "Beck",
Expand Down Expand Up @@ -50,13 +50,14 @@ Imports:
R2jags (>= 0.6.1),
rjags (>= 4.10),
tidyr,
truncnorm,
zoo
Suggests:
testthat (>= 3.0.0),
doParallel,
knitr,
rmarkdown,
covr
testthat (>= 3.0.0),
doParallel,
knitr,
rmarkdown,
covr
Remotes:
jmarriola/fwoxy
License: CC0
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export(ebase_prep)
export(fit_plot)
export(interp_plot)
export(metab_update)
export(prior_plot)
import(R2jags)
import(foreach)
import(here)
Expand Down
10 changes: 5 additions & 5 deletions R/credible_plot.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#' Plot credible intervals for a, b, and Rt_vol
#' Plot credible intervals for a, Rt_vol, and b
#'
#' @param res output data frame from \code{\link{ebase}}
#' @param params character vector indicating which parameters to plot, one to any of \code{a}, \code{b}, or \code{Rt_vol} (default all)
#' @param params character vector indicating which parameters to plot, one to any of \code{a}, \code{Rt_vol}, or \code{b}, (default all)
#'
#' @return A \code{\link[ggplot2]{ggplot}} object
#' @export
#'
#' @details This function plots 95% credible intervals (2.5th to 97.5th percentiles) for \code{a}, \code{b}, and/or \code{Rt_vol} using the output from \code{\link{ebase}}. Results in the plot are grouped by the \code{ndays} argument that was used in \code{\link{ebase}}.
#' @details This function plots 95% credible intervals (2.5th to 97.5th percentiles, approximate posterior distributions) for \code{a}, \code{Rt_vol} and/or \code{b} using the output from \code{\link{ebase}}. Results in the plot are grouped by the \code{ndays} argument that was used in \code{\link{ebase}}.
#'
#' @examples
#'
Expand All @@ -31,9 +31,9 @@
#'
#' # plot credible intervals
#' credible_plot(res)
credible_plot <- function(res, params = c('a', 'b', 'Rt_vol')){
credible_plot <- function(res, params = c('a', 'Rt_vol', 'b')){

toplo <- credible_prep(res, param = params, labels = TRUE)
toplo <- credible_prep(res, params = params, labels = TRUE)

p <- ggplot2::ggplot(toplo, ggplot2::aes(x = Date, y = mean, group = grp)) +
ggplot2::geom_point() +
Expand Down
26 changes: 13 additions & 13 deletions R/credible_prep.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
#' Get credible intervals for a, b, and Rt_vol
#' Get credible intervals for a, Rt_vol, b
#'
#' @param res output data frame from \code{\link{ebase}}
#' @param params character vector indicating which parameters to plot, one to any of \code{a}, \code{b}, or \code{Rt_vol} (default all)
#' @param params character vector indicating which parameters to plot, one to any of \code{a}, \code{Rt_vol}, or \code{b} (default all)
#' @param labels logical indicating of parameter labels are output as an expression for parsing in plot facets, default \code{FALSE}
#'
#' @return A data frame
#' @export
#'
#' @details This function gets 95% credible intervals (2.5th to 97.5th percentiles) for \code{a}, \code{b}, and/or \code{Rt_vol} using the output from \code{\link{ebase}}. The function is used in \code{\link{credible_plot}}, but is provided as a separate function for convenience.
#' @details This function gets 95% credible intervals (2.5th to 97.5th percentiles, approximate posterior distributions) for \code{a}, \code{Rt_vol}, and/or \code{b} using the output from \code{\link{ebase}}. The function is used in \code{\link{credible_plot}}, but is provided as a separate function for convenience.
#'
#' @examples
#'
Expand All @@ -32,24 +32,24 @@
#'
#' # get credible intervals
#' credible_prep(res)
credible_prep <- function(res, params = c('a', 'b', 'Rt_vol'), labels = FALSE){
credible_prep <- function(res, params = c('a', 'Rt_vol', 'b'), labels = FALSE){

chk <- params %in% c('a', 'b', 'Rt_vol')
chk <- params %in% c('a', 'Rt_vol', 'b')
if(any(!chk))
stop('param must be a, b, and/or Rt_vol')
stop('param must be a, Rt_vol and/or b')

paramslo <- paste0(params, 'lo')
paramshi <- paste0(params, 'hi')

labs <- c('a', 'b', 'Rt_vol')
labs <- c('a', 'Rt_vol', 'b')
if(labels)
labs <- c('a~(mmol~m^{-3}~d^{-1})(W~m^{-2})',
'b~(cm~hr^{-1})(m^{2}~s^{-2})',
'Rt[vol]~(mmol~m^{-3}~d^{-1})'
'Rt[vol]~(mmol~m^{-3}~d^{-1})',
'b~(cm~hr^{-1})(m^{2}~s^{-2})'
)

out <- res %>%
dplyr::select(any_of(c('Date', 'grp', params, paramslo, paramshi))) %>%
dplyr::select(dplyr::any_of(c('Date', 'grp', params, paramslo, paramshi))) %>%
unique() %>%
tidyr::pivot_longer(-c('Date', 'grp'), names_to = 'var', values_to = 'val') %>%
dplyr::group_by(grp) %>%
Expand All @@ -59,15 +59,15 @@ credible_prep <- function(res, params = c('a', 'b', 'Rt_vol'), labels = FALSE){
T ~ val
)
) %>%
mutate(
var = case_when(
dplyr::mutate(
var = dplyr::case_when(
!grepl('lo|hi', var) ~ paste0(var, 'mean'),
T ~ var
),
est = gsub(paste(paste0('^', params), collapse = '|'), '', var),
var = gsub('lo$|hi$|mean$', '', var),
var = factor(var,
levels = c('a', 'b', 'Rt_vol'),
levels = c('a', 'Rt_vol', 'b'),
labels = labs
)
) %>%
Expand Down
4 changes: 3 additions & 1 deletion R/globalVariables.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ globalVariables(c('%>%', '.', 'DO_obs', 'DO_sat', 'Date', 'DateTimeStamp', 'Sal'
'ats', 'bts', 'd', 'dDO', 'erts', 'exdat', 'gets', 'gppts', 'recompile',
'update', 'H', 'PAR', 'U10', 'a', 'b', 'jags.parallel', 'num.measurements',
'pow', 'r', 'sc', 'nstepd', 'zz', 'D', 'Pg_vol', 'Rt_vol', 'matches', 'name',
'value', 'xval', 'grp', 'i', 'interval', 'isinterp', 'yval', 'DO_mod', 'rsq'))
'value', 'xval', 'grp', 'i', 'interval', 'isinterp', 'yval', 'DO_mod', 'rsq',
'atshi', 'atslo', 'btshi', 'btslo', 'ertshi', 'ertslo', 'getshi', 'getslo',
'gpptshi', 'gpptslo', 'hi', 'lo', 'val', 'var', 'sd'))

#' @importFrom stats lm na.omit runif
NULL
62 changes: 62 additions & 0 deletions R/prior_plot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#' Plot prior distributions for a, Rt_vol, and b
#'
#' @param aprior numeric vector of length two indicating the mean and standard deviation for the prior distribution of the \emph{a} parameter, see details
#' @param rprior numeric vector of length two indicating the mean and standard deviation for the prior distribution of the \emph{r} parameter, see details
#' @param bprior numeric vector of length two indicating the mean and standard deviation for the prior distribution of the \emph{b} parameter, see details
#' @param n numeric indicating number of random samples to draw from prior distributions
#'
#' @return A \code{\link[ggplot2]{ggplot}} object
#' @export
#'
#' @details This function produces a plot of the prior distributions that are used in \code{\link{ebase}} for the \emph{a}, \emph{r}, and \emph{b} parameters for the optimization equation for estimating metabolism. The \code{\link{ebase}} function uses the same default values for the arguments for \code{aprior}, \code{rprior}, and \code{bprior} as required for this function. If the default values are changed for \code{\link{ebase}}, this function can be used to assess how changing characteristics of the prior distributions could influence the resulting parameter estimates and their posterior distributions (e.g., as shown with \code{\link{credible_plot}}.
#'
#' All parameters follow a normal Gaussian distribution for the prios with the means and standard deviations defined by the arguments. All distributions are truncated to include only values greater than zero as required by the core metabolism equation. Truncated normal distributions are obtained using the \code{\link[truncnorm]{rtruncnorm}} function with the number of random samples defined by the \code{n} argument.
#'
#' The x-axis label for the \emph{r} parameter is shown using the volumetric notation for respiration for consistency with \code{\link{credible_plot}}.
#'
#' @examples
#' # default plot
#' prior_plot()
#'
#' # changing the mean and standard deviation for the b parameter
#' prior_plot(bprior = c(0.4, 1))
prior_plot <- function(aprior = c(0.2, 0.96), rprior = c(20, 10), bprior = c(0.251, 0.04), n = 1000){

labs <- c('a~(mmol~m^{-3}~d^{-1})(W~m^{-2})',
'Rt[vol]~(mmol~m^{-3}~d^{-1})',
'b~(cm~hr^{-1})(m^{2}~s^{-2})'
)

aprior <- data.frame(var = 'aprior', mean = aprior[1], sd = aprior[2])
rprior <- data.frame(var = 'rprior', mean = rprior[1], sd = rprior[2])
bprior <- data.frame(var = 'bprior', mean = bprior[1], sd = bprior[2])

toplo <- rbind(aprior, rprior, bprior) %>%
dplyr::group_by(var) %>%
dplyr::mutate(
val = list(truncnorm::rtruncnorm(n, a = 0, mean = mean, sd = sd)),
var = factor(var,
levels = c('aprior', 'rprior', 'bprior'),
labels = labs
)
) %>%
dplyr::ungroup() %>%
dplyr::select(var, val) %>%
tidyr::unnest('val')

p <- ggplot2::ggplot(toplo, ggplot2::aes(x = val)) +
ggplot2::geom_density(fill = 'grey', adjust = 2) +
ggplot2::facet_wrap(~var, scales = 'free', labeller = ggplot2::label_parsed, strip.position = 'bottom') +
ggplot2::theme_minimal() +
ggplot2::theme(
strip.placement = 'outside',
strip.text = ggplot2::element_text(size = ggplot2::rel(1))
) +
ggplot2::labs(
y = 'Density',
x = NULL
)

return(p)

}
3 changes: 0 additions & 3 deletions log.txt

This file was deleted.

10 changes: 5 additions & 5 deletions man/credible_plot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions man/credible_prep.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 42 additions & 0 deletions man/prior_plot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions tests/testthat/test-prior_plot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
test_that("Check output for prior_plot", {
result <- prior_plot()
expect_s3_class(result, 'ggplot')
})
37 changes: 28 additions & 9 deletions vignettes/ebase-overview.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -122,11 +122,25 @@ A scatterplot showing modeled versus observed dissolved oxygen can also be retur
fit_plot(res, bygroup = TRUE, scatter = TRUE)
```

95% credible intervals for `a`, `b`, and `Rt_vol` are also returned with the output from `ebase()` in the corresponding columns `alo`, `ahi`, `blo`, `bhi`, `Rt_vollo`, and `Rt_volhi`, for the 2.5th and 97.5th percentile estimates for each parameter, respectively. These values indicate the interval within which there is a 95% probability that the true parameter is in this range. Note that all values for these parameters are repeated across rows, although only one estimate for each is returned based on the number of days defined by `ndays`.
The prior distributions for the $a$, $r$, and $b$ parameters are defined in the model file included with the package as normal Gaussian distributions with mean and standard deviations provided by the `aprior`, `rprior`, and `bprior` arguments in `ebase()`. The location of the model file can be viewed as follows.

```{r, eval = F}
system.file('ebase_model.txt', package = 'EBASE')
```

The default values for the priors were chosen based on the ability of EBASE to reproduce known parameters from a forward metabolism model. An additional constraint is that all the prior distributions are truncated to be positive values as required by the core metabolism equation above. Units for each parameter are (mmol/m3/d)(W/m2) for $a$, mmol/m3/d for $r$, and (cm/hr)/(m2/s2) for $b$.

The prior distributions can be viewed with the `prior_plot()` function. No changes are needed to the default arguments for this function if the default arguments are used for `ebase()`.

```{r, fig.height = 3, fig.width = 9}
prior_plot()
```

95% credible intervals for `a`, `Rt_vol`, and `b` are also returned with the output from `ebase()` in the corresponding columns `alo`, `ahi`, `blo`, `bhi`, `Rt_vollo`, and `Rt_volhi`, for the 2.5th and 97.5th percentile estimates for each parameter, respectively. These values indicate the interval within which there is a 95% probability that the true parameter is in this range and is a representation of the posterior distributions for each parameter. Note that all values for these parameters are repeated across rows, although only one estimate for each is returned based on the number of days defined by `ndays`.

The credible intervals can be plotted with the `credible_plot()` function.

```{r, fig.height = 6, fig.width = 7}
```{r, fig.height = 7, fig.width = 7}
credible_plot(res)
```

Expand Down Expand Up @@ -194,16 +208,15 @@ interp_plot(dat2, H = 1.85, interval = 900, param = 'DO_sat')

### Changing priors

The prior distributions for the $a$, $r$, and $b$ parameters are defined in the model file included with the package as normal distributions with mean and standard deviations provided by the `aprior`, `rprior`, and `bprior` arguments in `ebase()`. The location of the model file can be viewed as follows.

```{r, eval = F}
system.file('ebase_model.txt', package = 'EBASE')
```

The default values for the priors were chosen based on the ability of EBASE to reproduce known parameters from a forward metabolism model. An additional constraint is that all the prior distributions are truncated to be positive values as required by the core metabolism equation above. Units for each parameter are (mmol/m3/d)(W/m2) for $a$, mmol/m3/d for $r$, and (cm/hr)/(m2/s2) for $b$.
If the default values are changed for \code{\link{ebase}}, the `prior_plot()` function can be used to assess how changing characteristics of the prior distributions could influence the resulting parameter estimates and their posterior distributions (e.g., as shown with \code{\link{credible_plot}}.

Here, the prior distribution for the $b$ parameter is changed to have a mean of 0.4 and standard deviation of 1.

```{r, fig.height = 3, fig.width = 9}
prior_plot(bprior = c(0.4, 1))
```

The same change to the prior distribution for the $b$ parameter is applied to `ebase()`
```{r, fig.height = 3, fig.width = 9}
cl <- makeCluster(2)
registerDoParallel(cl)
Expand All @@ -215,4 +228,10 @@ stopCluster(cl)
ebase_plot(res, instantaneous = TRUE)
```

The `credible_plot()` function can be used to assess how changing the prior distributions has an influence on the posterior distributions of the parameters.

```{r, fig.height = 7, fig.width = 7}
credible_plot(res)
```

# References

0 comments on commit 8c25c6c

Please sign in to comment.