Rename at arguments into by (#649)

* Rename `at` arguments into `by` * update test * lintr * fix * fix broken URLs * spelling
easystats · May 20, 2024 · 0d91cc6 · 0d91cc6
1 parent bce014b
commit 0d91cc6
Show file tree

Hide file tree

Showing 10 changed files with 109 additions and 87 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: bayestestR
 Title: Understand and Describe Bayesian Models and Posterior Distributions
-Version: 0.13.2
+Version: 0.13.2.1
 Authors@R:
     c(person(given = "Dominique",
              family = "Makowski",

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,13 @@
+# bayestestR 0.13.3
+
+## Breaking Changes
+
+* Arguments named `group`, `at`, `group_by` and `split_by` will be deprecated
+  in future releases of _easystats_ packages. Please use `by` instead. This
+  affects following functions in *bayestestR*:
+
+  * `estimate_density()`
+
 # bayestestR 0.13.2
 
 ## Breaking Changes

diff --git a/R/area_under_curve.R b/R/area_under_curve.R
@@ -42,9 +42,9 @@ area_under_curve <- function(x, y, method = c("trapezoid", "step", "spline"), ..
   y <- y[idx]
 
   switch(match.arg(arg = method, choices = c("trapezoid", "step", "spline")),
-    "trapezoid" = sum((rowMeans(cbind(y[-length(y)], y[-1]))) * (x[-1] - x[-length(x)])),
-    "step" = sum(y[-length(y)] * (x[-1] - x[-length(x)])),
-    "spline" = stats::integrate(stats::splinefun(x, y, method = "natural"), lower = min(x), upper = max(x))$value
+    trapezoid = sum((rowMeans(cbind(y[-length(y)], y[-1]))) * (x[-1] - x[-length(x)])),
+    step = sum(y[-length(y)] * (x[-1] - x[-length(x)])),
+    spline = stats::integrate(stats::splinefun(x, y, method = "natural"), lower = min(x), upper = max(x))$value
   )
 }
 

diff --git a/R/check_prior.R b/R/check_prior.R
@@ -2,7 +2,7 @@
 #'
 #' Performs a simple test to check whether the prior is informative to the
 #' posterior. This idea, and the accompanying heuristics, were discussed in
-#' [this blogpost](https://statmodeling.stat.columbia.edu/2019/08/10/).
+#' _Gelman et al. 2017_.
 #'
 #' @param method Can be `"gelman"` or `"lakeland"`. For the
 #'   `"gelman"` method, if the SD of the posterior is more than 0.1 times
@@ -20,28 +20,30 @@
 #'   or `"not determinable"` if the prior distribution could not be
 #'   determined).
 #'
-#' @examples
+#' @examplesIf require("rstanarm") && require("see")
 #' \donttest{
 #' library(bayestestR)
-#' if (require("rstanarm")) {
-#'   model <- stan_glm(mpg ~ wt + am, data = mtcars, chains = 1, refresh = 0)
-#'   check_prior(model, method = "gelman")
-#'   check_prior(model, method = "lakeland")
+#' model <- rstanarm::stan_glm(mpg ~ wt + am, data = mtcars, chains = 1, refresh = 0)
+#' check_prior(model, method = "gelman")
+#' check_prior(model, method = "lakeland")
 #'
-#'   # An extreme example where both methods diverge:
-#'   model <- stan_glm(mpg ~ wt,
-#'     data = mtcars[1:3, ],
-#'     prior = normal(-3.3, 1, FALSE),
-#'     prior_intercept = normal(0, 1000, FALSE),
-#'     refresh = 0
-#'   )
-#'   check_prior(model, method = "gelman")
-#'   check_prior(model, method = "lakeland")
-#'   # can provide visual confirmation to the Lakeland method
-#'   plot(si(model, verbose = FALSE))
+#' # An extreme example where both methods diverge:
+#' model <- rstanarm::stan_glm(mpg ~ wt,
+#'   data = mtcars[1:3, ],
+#'   prior = normal(-3.3, 1, FALSE),
+#'   prior_intercept = normal(0, 1000, FALSE),
+#'   refresh = 0
+#' )
+#' check_prior(model, method = "gelman")
+#' check_prior(model, method = "lakeland")
+#' # can provide visual confirmation to the Lakeland method
+#' plot(si(model, verbose = FALSE))
 #' }
-#' }
-#' @references https://statmodeling.stat.columbia.edu/2019/08/10/
+#' @references
+#' Gelman, A., Simpson, D., and Betancourt, M. (2017). The Prior Can Often Only
+#' Be Understood in the Context of the Likelihood. Entropy, 19(10), 555.
+#' \doi{10.3390/e19100555}
+#'
 #' @export
 check_prior <- function(model, method = "gelman", simulate_priors = TRUE, ...) {
   UseMethod("check_prior")

diff --git a/R/estimate_density.R b/R/estimate_density.R
@@ -21,10 +21,10 @@
 #' @param select Character vector of column names. If NULL (the default), all
 #' numeric variables will be selected. Other arguments from [datawizard::find_columns()]
 #' (such as `exclude`) can also be used.
-#' @param at Optional character vector. If not `NULL` and input is a data frame,
-#' density estimation is performed for each group (subsets) indicated by `at`.
+#' @param by Optional character vector. If not `NULL` and input is a data frame,
+#' density estimation is performed for each group (subsets) indicated by `by`.
 #' See examples.
-#' @param group_by Deprecated in favour of `at`.
+#' @param at Deprecated in favour of `by`.
 #'
 #' @note There is also a [`plot()`-method](https://easystats.github.io/see/articles/bayestestR.html) implemented in the \href{https://easystats.github.io/see/}{\pkg{see}-package}.
 #'
@@ -70,8 +70,8 @@
 #' head(estimate_density(iris, select = "Sepal.Width"))
 #'
 #' # Grouped data
-#' head(estimate_density(iris, at = "Species"))
-#' head(estimate_density(iris$Petal.Width, at = iris$Species))
+#' head(estimate_density(iris, by = "Species"))
+#' head(estimate_density(iris$Petal.Width, by = iris$Species))
 #' \donttest{
 #' # rstanarm models
 #' # -----------------------------------------------
@@ -172,34 +172,34 @@ estimate_density.numeric <- function(x,
                                      extend_scale = 0.1,
                                      bw = "SJ",
                                      ci = NULL,
+                                     by = NULL,
                                      at = NULL,
-                                     group_by = NULL,
                                      ...) {
   # TODO remove deprecation warning
   # Sanity
-  if (!is.null(group_by)) {
+  if (!is.null(at)) {
     insight::format_warning(
-      "The `group_by` argument is deprecated and might be removed in a future update. Please replace by `at`."
+      "The `at` argument is deprecated and might be removed in a future update. Please replace by `by`."
     )
-    at <- group_by
+    by <- at
   }
 
-  if (!is.null(at)) {
-    if (length(at) == 1) {
+  if (!is.null(by)) {
+    if (length(by) == 1) {
       insight::format_error(paste0(
-        "`at` must be either the name of a group column if a data frame is entered as input,",
+        "`by` must be either the name of a group column if a data frame is entered as input,",
         " or in this case (where a single vector was passed) a vector of same length."
       ))
     }
     out <- estimate_density(
-      data.frame(V1 = x, Group = at, stringsAsFactors = FALSE),
+      data.frame(V1 = x, Group = by, stringsAsFactors = FALSE),
       method = method,
       precision = precision,
       extend = extend,
       extend_scale = extend_scale,
       bw = bw,
       ci = ci,
-      at = "Group",
+      by = "Group",
       ...
     )
     out$Parameter <- NULL
@@ -230,19 +230,19 @@ estimate_density.data.frame <- function(x,
                                         bw = "SJ",
                                         ci = NULL,
                                         select = NULL,
+                                        by = NULL,
                                         at = NULL,
-                                        group_by = NULL,
                                         ...) {
   # Sanity
-  if (!is.null(group_by)) {
+  if (!is.null(at)) {
     insight::format_warning(paste0(
-      "The `group_by` argument is deprecated and might be removed in a future update.",
-      " Please replace by `at`."
+      "The `at` argument is deprecated and might be removed in a future update.",
+      " Please replace by `by`."
     ))
-    at <- group_by
+    by <- at
   }
 
-  if (is.null(at)) {
+  if (is.null(by)) {
     # No grouping -------------------
     out <- .estimate_density_df(
       x = x,
@@ -256,9 +256,9 @@ estimate_density.data.frame <- function(x,
       ...
     )
   } else {
-    # Deal with at- grouping --------
+    # Deal with by- grouping --------
 
-    groups <- insight::get_datagrid(x[, at, drop = FALSE], by = at) # Get combinations
+    groups <- insight::get_datagrid(x[, by, drop = FALSE], by = by) # Get combinations
     out <- data.frame()
     for (row in seq_len(nrow(groups))) {
       subdata <- datawizard::data_match(x, groups[row, , drop = FALSE])
@@ -292,9 +292,17 @@ estimate_density.draws <- function(x,
                                    bw = "SJ",
                                    ci = NULL,
                                    select = NULL,
+                                   by = NULL,
                                    at = NULL,
-                                   group_by = NULL,
                                    ...) {
+  if (!is.null(at)) {
+    insight::format_warning(paste0(
+      "The `at` argument is deprecated and might be removed in a future update.",
+      " Please replace by `by`."
+    ))
+    by <- at
+  }
+
   estimate_density(
     .posterior_draws_to_df(x),
     method = method,
@@ -303,8 +311,7 @@ estimate_density.draws <- function(x,
     extend_scale = extend_scale,
     bw = bw,
     select = select,
-    at = at,
-    group_by = group_by
+    by = by
   )
 }
 
@@ -577,7 +584,7 @@ as.data.frame.density <- function(x, ...) {
 
   for (i in names(out)) {
     if (nrow(out[[i]]) == 0) {
-      insight::format_warning(paste0("'", i, "', or one of its 'at' groups, is empty and has no density information."))
+      insight::format_warning(paste0("`", i, "`, or one of its groups specified in `by`, is empty and has no density information."))
     } else {
       out[[i]]$Parameter <- i
     }

diff --git a/R/simulate_simpson.R b/R/simulate_simpson.R
@@ -31,14 +31,14 @@ simulate_simpson <- function(n = 100,
     insight::format_error("The number of observations `n` should be larger than 3.")
   }
 
-  data <- data.frame()
+  out <- data.frame()
   for (i in 1:groups) {
     dat <- simulate_correlation(n = n, r = r)
     dat$V1 <- dat$V1 + difference * i # (i * -sign(r))
     dat$V2 <- dat$V2 + difference * (i * -sign(r))
     dat$Group <- sprintf(paste0(group_prefix, "%0", nchar(trunc(abs(groups))), "d"), i)
-    data <- rbind(data, dat)
+    out <- rbind(out, dat)
   }
 
-  data
+  out
 }
diff --git a/inst/WORDLIST b/inst/WORDLIST
@@ -12,6 +12,7 @@ Baws
 BayesFactor
 Bayesfactor
 Bergh
+Betancourt
 Bridgesampling
 CMD
 CRC

diff --git a/man/check_prior.Rd b/man/check_prior.Rd
diff --git a/man/estimate_density.Rd b/man/estimate_density.Rd
diff --git a/tests/testthat/test-estimate_density.R b/tests/testthat/test-estimate_density.R
@@ -17,20 +17,20 @@ test_that("estimate_density", {
   expect_equal(mean(density_kernel$y - density_mixture$y), 0, tolerance = 0.1)
 
   x <- iris
-  x$Fac <- rep(c("A", "B"), length.out = 150)
+  x$Fac <- rep_len(c("A", "B"), 150)
 
   rez <- estimate_density(x, select = "Sepal.Length")
-  expect_equal(dim(rez), c(1024, 3))
+  expect_identical(dim(rez), c(1024L, 3L))
 
   rez <- estimate_density(x, select = c("Sepal.Length", "Petal.Length"))
-  expect_equal(dim(rez), c(2048, 3))
+  expect_identical(dim(rez), c(2048L, 3L))
 
-  rez <- estimate_density(x, select = "Sepal.Length", at = "Species")
-  expect_equal(dim(rez), c(1024 * 3, 4))
+  rez <- estimate_density(x, select = "Sepal.Length", by = "Species")
+  expect_identical(dim(rez), as.integer(c(1024 * 3, 4)))
 
-  rez <- estimate_density(x, select = c("Sepal.Length", "Petal.Length"), at = "Species")
-  expect_equal(dim(rez), c(2048 * 3, 4))
+  rez <- estimate_density(x, select = c("Sepal.Length", "Petal.Length"), by = "Species")
+  expect_identical(dim(rez), as.integer(c(2048 * 3, 4)))
 
-  rez <- estimate_density(x, select = "Sepal.Length", at = c("Species", "Fac"), method = "KernSmooth")
-  expect_equal(dim(rez), c(1024 * 3 * 2, 5))
+  rez <- estimate_density(x, select = "Sepal.Length", by = c("Species", "Fac"), method = "KernSmooth")
+  expect_identical(dim(rez), as.integer(c(1024 * 3 * 2, 5)))
 })