Merge branch 'issue-222' into main: closes #222

gavinsimpson · May 16, 2023 · ec064d4 · ec064d4
2 parents e61499c + 6e151e7
commit ec064d4
Show file tree

Hide file tree

Showing 12 changed files with 133 additions and 31 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: gratia
-Version: 0.8.1.31
-Date: 2023-04-06
+Version: 0.8.1.32
+Date: 2023-05-16
 Title: Graceful 'ggplot'-Based Graphics and Other Functions for GAMs Fitted Using 'mgcv'
 Authors@R: c(person(given = "Gavin L.", family = "Simpson",
                     email = "ucfagls@gmail.com",

diff --git a/NEWS.md b/NEWS.md
@@ -1,4 +1,4 @@
-# gratia 0.8.1.31
+# gratia 0.8.1.32
 
 ## User visible changes
 
@@ -171,6 +171,10 @@
 * `response_derivatives()` was incorrectly using `.data` with *tidyselect*
   selectors.
 
+* `typical_values()` could not handle logical variables in a GAM fit as mgcv
+  stores these as numerics in the `var.summary`. This affected `evenly()` and
+  `data_slice()`. #222
+
 # gratia 0.8.1
 
 ## User visible changes

diff --git a/R/data-slice.R b/R/data-slice.R
@@ -48,6 +48,8 @@
 #'   needed to fit the model. If `NULL`, the default, the data used to fit the
 #'   model will be recovered using `model.frame`. User-supplied expressions
 #'   passed in `...` will be evaluated in `data`.
+#' @param envir the environment within which to recreate the data used to fit
+#'   `object`.
 #'
 #' @export
 #' @rdname data_slice
@@ -75,8 +77,10 @@
 #' # or provide an expression (function call) which will be evaluated in the
 #' # data frame passed to `data` or `model.frame(object)`
 #' ds <- data_slice(m1, x2 = evenly(x2, n = 50), x1 = mean(x1))
-`data_slice.gam` <- function(object, ..., data = NULL) {
+`data_slice.gam` <- function(object, ..., data = NULL,
+                             envir = environment(formula(object))) {
     # prep data
+    odata <- data
     data <- data_slice_data(object, data = data)
 
     # deal with ...
@@ -96,7 +100,7 @@
     # typical values, only needed ones that aren't
     need_tv <- setdiff(vars, names(slice_vars))
     if (length(need_tv) > 0L) {
-        tv <- typical_values(object)
+        tv <- typical_values(object, data = odata, envir = envir)
         slice_vars <- append(slice_vars, tv[need_tv])
     }
 
@@ -283,10 +287,17 @@
 #' @rdname typical_values
 #' @param vars terms to include or exclude from the returned object. Uses
 #'   tidyselect principles.
+#' @param envir the environment within which to recreate the data used to fit
+#'   `object`.
+#' @param data an optional data frame of data used to fit the mdoel if
+#'   reconstruction of the data from the model doesn't work.
+#' 
 #' @export
 #' @importFrom rlang enquo
 #' @importFrom tidyselect eval_select
-`typical_values.gam` <- function(object, vars = everything(), ...) {
+#' @importFrom stats model.frame formula
+`typical_values.gam` <- function(object, vars = everything(),
+    envir = environment(formula(object)), data = NULL, ...) {
     # extract the summary from the fitted GAM
     # summ is a named list
     summ <- object[["var.summary"]]
@@ -299,6 +310,28 @@
     # for numeric variables summ is a vector with 3 elements, we want element 2
     # which contains the value of the observation closest to the median
     # probably need to handle matrix covariates here separately from numerics
+    # logical values get stored as numeric in the summary
+    # dc <- data_class(summ) # mgcv doesn't store logicals as logicals
+    # so we need to extract the data classes ourselves
+    # try to recover the data
+    mf <- model.frame(object)
+    if (is.null(data)) {
+        data <- eval(object$call$data, envir)
+    }
+    if (is.null(data)) {
+        data <- mf
+    }
+    data <- data[names(summ)] # take only vars mgcv thinks we need
+    dc <- data_class(data)
+
+    # if any logicals extract them as per numeric (2nd value) and convert to
+    # logical. do this before extracting the numerics
+    is_log <- dc == "logical"
+    if (any(is_log)) {
+        summ[is_log] <- lapply(summ[is_log], \(x) as.logical(x[2]))
+    }
+
+    # now process the numerics
     dc <- data_class(summ)
     i <-  dc == "numeric" & lengths(summ) == 3L
     summ[i] <- lapply(summ[i], `[`, 2)
@@ -382,12 +415,19 @@
     UseMethod("data_combos")
 }
 
+#' @param envir the environment within which to recreate the data used to fit
+#'   `object`.
+#' @param data an optional data frame of data used to fit the mdoel if
+#'   reconstruction of the data from the model doesn't work.
+#'
 #' @inheritParams factor_combos
 #' @export
 #' @rdname data_combos
 `data_combos.gam` <- function(object, vars = everything(),
-                              complete = TRUE, ...) {
-    tv <- typical_values(object)
+                              complete = TRUE,
+                              envir = environment(formula(object)),
+                              data = NULL, ...) {
+    tv <- typical_values(object, envir = envir, data = data)
     is_fac <- vapply(tv, is.factor, logical(1L))
     if (any(is_fac)) { # drop factor from typical values
         tv <- tv[, !is_fac]

diff --git a/R/parametric-effects.R b/R/parametric-effects.R
@@ -70,7 +70,7 @@
     }
 
     # data combinations to get all parametric terms inc factor level combos
-    tbl <- data_combos(object, complete = FALSE)
+    tbl <- data_combos(object, complete = FALSE, envir = envir, data = data)
     # predict model contributions for the parametric terms, using only
     # the factor combos in the data and typical values of all other terms
     # and exclude the effects of smooths as they don't change anything in

diff --git a/man/data_combos.Rd b/man/data_combos.Rd
diff --git a/man/data_slice.Rd b/man/data_slice.Rd
diff --git a/man/typical_values.Rd b/man/typical_values.Rd
diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R
@@ -16,6 +16,7 @@ library("nlme")
 
 ## Fit models
 quick_eg1 <- data_sim("eg1", n = 200, seed = 1)
+quick_eg1_off <- quick_eg1 |> mutate(off = 2)
 su_eg1 <- data_sim("eg1", n = 1000,  dist = "normal", scale = 2, seed = 1)
 su_eg2 <- data_sim("eg2", n = 2000, dist = "normal", scale = 0.5, seed = 42)
 su_eg3 <- data_sim("eg3", n = 400, seed = 32)
@@ -141,7 +142,7 @@ su_gamm_univar_4 <- gamm(y ~ s(x0) + s(x1) + s(x2) + s(x3),
 m_1_smooth <- gam(y ~ s(x0), data = quick_eg1, method = "REML")
 
 m_1_smooth_offset <- gam(y ~ s(x0) + offset(log(off)),
-  data = quick_eg1 |> mutate(off = 2), method = "REML")
+  data = quick_eg1_off, method = "REML")
 
 m_gam <- su_m_univar_4
 
@@ -346,3 +347,11 @@ cens_df$y_cens <- with(cens_df, cbind(y, censored))
 
 m_censor <- bam(y_cens ~ s(x0) + s(x1) + s(x2) + s(x3), data = cens_df,
   family = cnorm(), method = "fREML")
+
+# examples for logical variables - examples if from mgcViz::pterms
+logi_df <- data_sim("eg1", n = 600, dist = "normal", scale = 20, seed = 3) |>
+    mutate(fac = as.factor(sample(c("A1", "A2", "A3"), 600, replace = TRUE)),
+           logi = as.logical(sample(c(TRUE, FALSE), 600, replace = TRUE)))
+m_logical <- gam(y ~ x0 + x1 + I(x1^2) + s(x2, bs = "cr", k = 12) + fac +
+                   x3:fac + I(x1*x2) + logi,
+                 data = logi_df)
diff --git a/tests/testthat/test-add-functions.R b/tests/testthat/test-add-functions.R
@@ -103,7 +103,8 @@ test_that("add_confint works for smooth_estimates", {
 })
 
 test_that("add_confint.default fails is no est and se", {
-    expect_error(add_confint(typical_values(m_gam)),
+    expect_error(add_confint(typical_values(m_gam,
+    data = su_eg1, envir = teardown_env())),
                  "'object' does not contain one or both of 'est' or 'se'.")
 })
 

diff --git a/tests/testthat/test-data-slice.R b/tests/testthat/test-data-slice.R
@@ -7,15 +7,17 @@ library("mgcv")
 
 test_that("data_slice works for a GAM", {
     expect_silent(ds <- data_slice(su_m_quick_eg1,
-                                   x1 = evenly(x1, n = 50),
-                                   x2 = evenly(x2, n = 50)))
+        x1 = evenly(x1, n = 50),
+        x2 = evenly(x2, n = 50), data = quick_eg1, envir = teardown_env()))
     expect_s3_class(ds, "tbl_df")
     expect_named(ds, c("x1", "x2", "x0", "x3"))
     expect_message(data_slice(su_m_quick_eg1,
-                              x1 = evenly(x1, n = 50), var2 = "foo"),
+                              x1 = evenly(x1, n = 50), var2 = "foo",
+                              data = quick_eg1, envir = teardown_env()),
                    "Some specified variable\\(s\\) not used in model")
     expect_message(data_slice(su_m_quick_eg1,
-                              x1 = evenly(x1, n = 50), var2 = "foo"),
+                              x1 = evenly(x1, n = 50), var2 = "foo",
+                              data = quick_eg1, envir = teardown_env()),
                    "var2")
 })
 
@@ -74,8 +76,9 @@ test_that("process_slice_var returns NULL when `x` is NULL", {
 
 test_that("data_slice works for a GAM with factor by", {
     expect_silent(ds <- data_slice(su_m_factor_by,
-                                   x2 = evenly(x2),
-                                   fac = evenly(fac)))
+        x2 = evenly(x2),
+        fac = evenly(fac),
+        data = su_eg4, envir = teardown_env()))
     expect_s3_class(ds, "tbl_df")
     expect_named(ds, c("x2", "fac", "x0"))
 })
@@ -107,22 +110,25 @@ test_that("value_closest_to_median works with a factor", {
 
 # typical_values()
 test_that("typical_values works with a simple GAM", {
-    expect_silent(tv <- typical_values(su_m_quick_eg1))
+    expect_silent(tv <- typical_values(su_m_quick_eg1,
+        data = quick_eg1, envir = teardown_env()))
     expect_s3_class(tv, "tbl_df")
     expect_identical(nrow(tv), 1L)
     expect_identical(ncol(tv), 4L)
 })
 
 test_that("typical_values works when including terms", {
-    expect_silent(tv <- typical_values(su_m_quick_eg1, vars = c(x0, x2)))
+    expect_silent(tv <- typical_values(su_m_quick_eg1, vars = c(x0, x2),
+        data = quick_eg1, envir = teardown_env()))
     expect_s3_class(tv, "tbl_df")
     expect_identical(nrow(tv), 1L)
     expect_identical(ncol(tv), 2L)
     expect_identical(names(tv), c("x0","x2"))
 })
 
 test_that("typical_values works when excluding terms", {
-    expect_silent(tv <- typical_values(su_m_quick_eg1, vars = !c(x0, x2)))
+    expect_silent(tv <- typical_values(su_m_quick_eg1, vars = !c(x0, x2),
+        data = quick_eg1, envir = teardown_env()))
     expect_s3_class(tv, "tbl_df")
     expect_identical(nrow(tv), 1L)
     expect_identical(ncol(tv), 2L)
@@ -161,46 +167,52 @@ test_that("factor_combos works when there are no factor terms", {
 
 # data_combos()
 test_that("data_combos works with a GAM", {
-    expect_silent(dc <- data_combos(m_para_sm))
+    expect_silent(dc <- data_combos(m_para_sm,
+        envir = teardown_env(), data = df_2_fac))
     expect_s3_class(dc, "tbl_df")
     expect_identical(nrow(dc), 12L)
     expect_identical(ncol(dc), 5L)
     expect_named(dc, c("fac", "ff", "x0", "x1", "x2"))
 })
 
 test_that("data_combos works when including terms", {
-    expect_silent(dc <- data_combos(m_para_sm, vars = c(fac, x0)))
+    expect_silent(dc <- data_combos(m_para_sm, vars = c(fac, x0),
+        data = df_2_fac, envir = teardown_env()))
     expect_s3_class(dc, "tbl_df")
     expect_identical(nrow(dc), 12L)
     expect_identical(ncol(dc), 2L)
     expect_named(dc, c("fac", "x0"))
 })
 
 test_that("data_combos works when exluding terms", {
-    expect_silent(dc <- data_combos(m_para_sm, vars = !c(fac, x0)))
+    expect_silent(dc <- data_combos(m_para_sm, vars = !c(fac, x0),
+        data = df_2_fac, envir = teardown_env()))
     expect_s3_class(dc, "tbl_df")
     expect_identical(nrow(dc), 12L)
     expect_identical(ncol(dc), 3L)
     expect_named(dc, c("ff", "x1", "x2"))
 })
 
 test_that("data_combos works when there are no factor terms", {
-    expect_silent(dc <- data_combos(m_gam))
+    expect_silent(dc <- data_combos(m_gam,
+        data = su_eg1, envir = teardown_env()))
     expect_identical(nrow(dc), 1L)
     expect_identical(ncol(dc), 4L)
     expect_named(dc, c("x0", "x1", "x2", "x3"))
 })
 
 # Test data_slice with models that have an offset(s) - # 189
 test_that("data_slice with no args works with models with an offset", {
-    expect_silent(ds <- data_slice(m_1_smooth_offset))
+    expect_silent(ds <- data_slice(m_1_smooth_offset,
+        data = quick_eg1_off, envir = teardown_env()))
     expect_identical(nrow(ds), 1L)
     expect_identical(ncol(ds), 2L)
     expect_identical(ds$off, 2)
 })
 
 test_that("data_slice with works with models with an offset", {
-    expect_silent(ds <- data_slice(m_1_smooth_offset, off = 1))
+    expect_silent(ds <- data_slice(m_1_smooth_offset, off = 1,
+        data = quick_eg1_off, envir = teardown_env()))
     expect_identical(nrow(ds), 1L)
     expect_identical(ncol(ds), 2L)
     expect_identical(ds$off, 1)
@@ -229,3 +241,9 @@ test_that("data_slice works for a data frame", {
     expect_snapshot(ds, cran = FALSE)
     expect_identical(nrow(ds), 625L)
 })
+
+# make sure #222 remains fixed
+test_that("issue 222 is fixed", {
+    expect_silent(tv <- typical_values(m_logical, envir = teardown_env(),
+        data = logi_df))
+})
diff --git a/tests/testthat/test-lp-matrix.R b/tests/testthat/test-lp-matrix.R
@@ -9,7 +9,8 @@ test_that("lp_matrix works for a GAM", {
     expect_s3_class(xp, "lp_matrix")
     expect_s3_class(xp, "matrix")
 
-    expect_silent(ds <- data_slice(m_gam, x2 = evenly(x2, n = 50)))
+    expect_silent(ds <- data_slice(m_gam, x2 = evenly(x2, n = 50),
+        data = su_eg1, envir = teardown_env()))
     expect_silent(xp <- lp_matrix(m_gam, data = ds))
     expect_s3_class(xp, "lp_matrix")
     expect_s3_class(xp, "matrix")