Skip to content

Commit

Permalink
Merge branch 'issue-222' into main: closes #222
Browse files Browse the repository at this point in the history
  • Loading branch information
gavinsimpson committed May 16, 2023
2 parents e61499c + 6e151e7 commit ec064d4
Show file tree
Hide file tree
Showing 12 changed files with 133 additions and 31 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
@@ -1,6 +1,6 @@
Package: gratia
Version: 0.8.1.31
Date: 2023-04-06
Version: 0.8.1.32
Date: 2023-05-16
Title: Graceful 'ggplot'-Based Graphics and Other Functions for GAMs Fitted Using 'mgcv'
Authors@R: c(person(given = "Gavin L.", family = "Simpson",
email = "ucfagls@gmail.com",
Expand Down
6 changes: 5 additions & 1 deletion NEWS.md
@@ -1,4 +1,4 @@
# gratia 0.8.1.31
# gratia 0.8.1.32

## User visible changes

Expand Down Expand Up @@ -171,6 +171,10 @@
* `response_derivatives()` was incorrectly using `.data` with *tidyselect*
selectors.

* `typical_values()` could not handle logical variables in a GAM fit as mgcv
stores these as numerics in the `var.summary`. This affected `evenly()` and
`data_slice()`. #222

# gratia 0.8.1

## User visible changes
Expand Down
50 changes: 45 additions & 5 deletions R/data-slice.R
Expand Up @@ -48,6 +48,8 @@
#' needed to fit the model. If `NULL`, the default, the data used to fit the
#' model will be recovered using `model.frame`. User-supplied expressions
#' passed in `...` will be evaluated in `data`.
#' @param envir the environment within which to recreate the data used to fit
#' `object`.
#'
#' @export
#' @rdname data_slice
Expand Down Expand Up @@ -75,8 +77,10 @@
#' # or provide an expression (function call) which will be evaluated in the
#' # data frame passed to `data` or `model.frame(object)`
#' ds <- data_slice(m1, x2 = evenly(x2, n = 50), x1 = mean(x1))
`data_slice.gam` <- function(object, ..., data = NULL) {
`data_slice.gam` <- function(object, ..., data = NULL,
envir = environment(formula(object))) {
# prep data
odata <- data
data <- data_slice_data(object, data = data)

# deal with ...
Expand All @@ -96,7 +100,7 @@
# typical values, only needed ones that aren't
need_tv <- setdiff(vars, names(slice_vars))
if (length(need_tv) > 0L) {
tv <- typical_values(object)
tv <- typical_values(object, data = odata, envir = envir)
slice_vars <- append(slice_vars, tv[need_tv])
}

Expand Down Expand Up @@ -283,10 +287,17 @@
#' @rdname typical_values
#' @param vars terms to include or exclude from the returned object. Uses
#' tidyselect principles.
#' @param envir the environment within which to recreate the data used to fit
#' `object`.
#' @param data an optional data frame of data used to fit the mdoel if
#' reconstruction of the data from the model doesn't work.
#'
#' @export
#' @importFrom rlang enquo
#' @importFrom tidyselect eval_select
`typical_values.gam` <- function(object, vars = everything(), ...) {
#' @importFrom stats model.frame formula
`typical_values.gam` <- function(object, vars = everything(),
envir = environment(formula(object)), data = NULL, ...) {
# extract the summary from the fitted GAM
# summ is a named list
summ <- object[["var.summary"]]
Expand All @@ -299,6 +310,28 @@
# for numeric variables summ is a vector with 3 elements, we want element 2
# which contains the value of the observation closest to the median
# probably need to handle matrix covariates here separately from numerics
# logical values get stored as numeric in the summary
# dc <- data_class(summ) # mgcv doesn't store logicals as logicals
# so we need to extract the data classes ourselves
# try to recover the data
mf <- model.frame(object)
if (is.null(data)) {
data <- eval(object$call$data, envir)
}
if (is.null(data)) {
data <- mf
}
data <- data[names(summ)] # take only vars mgcv thinks we need
dc <- data_class(data)

# if any logicals extract them as per numeric (2nd value) and convert to
# logical. do this before extracting the numerics
is_log <- dc == "logical"
if (any(is_log)) {
summ[is_log] <- lapply(summ[is_log], \(x) as.logical(x[2]))
}

# now process the numerics
dc <- data_class(summ)
i <- dc == "numeric" & lengths(summ) == 3L
summ[i] <- lapply(summ[i], `[`, 2)
Expand Down Expand Up @@ -382,12 +415,19 @@
UseMethod("data_combos")
}

#' @param envir the environment within which to recreate the data used to fit
#' `object`.
#' @param data an optional data frame of data used to fit the mdoel if
#' reconstruction of the data from the model doesn't work.
#'
#' @inheritParams factor_combos
#' @export
#' @rdname data_combos
`data_combos.gam` <- function(object, vars = everything(),
complete = TRUE, ...) {
tv <- typical_values(object)
complete = TRUE,
envir = environment(formula(object)),
data = NULL, ...) {
tv <- typical_values(object, envir = envir, data = data)
is_fac <- vapply(tv, is.factor, logical(1L))
if (any(is_fac)) { # drop factor from typical values
tv <- tv[, !is_fac]
Expand Down
2 changes: 1 addition & 1 deletion R/parametric-effects.R
Expand Up @@ -70,7 +70,7 @@
}

# data combinations to get all parametric terms inc factor level combos
tbl <- data_combos(object, complete = FALSE)
tbl <- data_combos(object, complete = FALSE, envir = envir, data = data)
# predict model contributions for the parametric terms, using only
# the factor combos in the data and typical values of all other terms
# and exclude the effects of smooths as they don't change anything in
Expand Down
15 changes: 14 additions & 1 deletion man/data_combos.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/data_slice.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 13 additions & 1 deletion man/typical_values.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 10 additions & 1 deletion tests/testthat/setup.R
Expand Up @@ -16,6 +16,7 @@ library("nlme")

## Fit models
quick_eg1 <- data_sim("eg1", n = 200, seed = 1)
quick_eg1_off <- quick_eg1 |> mutate(off = 2)
su_eg1 <- data_sim("eg1", n = 1000, dist = "normal", scale = 2, seed = 1)
su_eg2 <- data_sim("eg2", n = 2000, dist = "normal", scale = 0.5, seed = 42)
su_eg3 <- data_sim("eg3", n = 400, seed = 32)
Expand Down Expand Up @@ -141,7 +142,7 @@ su_gamm_univar_4 <- gamm(y ~ s(x0) + s(x1) + s(x2) + s(x3),
m_1_smooth <- gam(y ~ s(x0), data = quick_eg1, method = "REML")

m_1_smooth_offset <- gam(y ~ s(x0) + offset(log(off)),
data = quick_eg1 |> mutate(off = 2), method = "REML")
data = quick_eg1_off, method = "REML")

m_gam <- su_m_univar_4

Expand Down Expand Up @@ -346,3 +347,11 @@ cens_df$y_cens <- with(cens_df, cbind(y, censored))

m_censor <- bam(y_cens ~ s(x0) + s(x1) + s(x2) + s(x3), data = cens_df,
family = cnorm(), method = "fREML")

# examples for logical variables - examples if from mgcViz::pterms
logi_df <- data_sim("eg1", n = 600, dist = "normal", scale = 20, seed = 3) |>
mutate(fac = as.factor(sample(c("A1", "A2", "A3"), 600, replace = TRUE)),
logi = as.logical(sample(c(TRUE, FALSE), 600, replace = TRUE)))
m_logical <- gam(y ~ x0 + x1 + I(x1^2) + s(x2, bs = "cr", k = 12) + fac +
x3:fac + I(x1*x2) + logi,
data = logi_df)
3 changes: 2 additions & 1 deletion tests/testthat/test-add-functions.R
Expand Up @@ -103,7 +103,8 @@ test_that("add_confint works for smooth_estimates", {
})

test_that("add_confint.default fails is no est and se", {
expect_error(add_confint(typical_values(m_gam)),
expect_error(add_confint(typical_values(m_gam,
data = su_eg1, envir = teardown_env())),
"'object' does not contain one or both of 'est' or 'se'.")
})

Expand Down
48 changes: 33 additions & 15 deletions tests/testthat/test-data-slice.R
Expand Up @@ -7,15 +7,17 @@ library("mgcv")

test_that("data_slice works for a GAM", {
expect_silent(ds <- data_slice(su_m_quick_eg1,
x1 = evenly(x1, n = 50),
x2 = evenly(x2, n = 50)))
x1 = evenly(x1, n = 50),
x2 = evenly(x2, n = 50), data = quick_eg1, envir = teardown_env()))
expect_s3_class(ds, "tbl_df")
expect_named(ds, c("x1", "x2", "x0", "x3"))
expect_message(data_slice(su_m_quick_eg1,
x1 = evenly(x1, n = 50), var2 = "foo"),
x1 = evenly(x1, n = 50), var2 = "foo",
data = quick_eg1, envir = teardown_env()),
"Some specified variable\\(s\\) not used in model")
expect_message(data_slice(su_m_quick_eg1,
x1 = evenly(x1, n = 50), var2 = "foo"),
x1 = evenly(x1, n = 50), var2 = "foo",
data = quick_eg1, envir = teardown_env()),
"var2")
})

Expand Down Expand Up @@ -74,8 +76,9 @@ test_that("process_slice_var returns NULL when `x` is NULL", {

test_that("data_slice works for a GAM with factor by", {
expect_silent(ds <- data_slice(su_m_factor_by,
x2 = evenly(x2),
fac = evenly(fac)))
x2 = evenly(x2),
fac = evenly(fac),
data = su_eg4, envir = teardown_env()))
expect_s3_class(ds, "tbl_df")
expect_named(ds, c("x2", "fac", "x0"))
})
Expand Down Expand Up @@ -107,22 +110,25 @@ test_that("value_closest_to_median works with a factor", {

# typical_values()
test_that("typical_values works with a simple GAM", {
expect_silent(tv <- typical_values(su_m_quick_eg1))
expect_silent(tv <- typical_values(su_m_quick_eg1,
data = quick_eg1, envir = teardown_env()))
expect_s3_class(tv, "tbl_df")
expect_identical(nrow(tv), 1L)
expect_identical(ncol(tv), 4L)
})

test_that("typical_values works when including terms", {
expect_silent(tv <- typical_values(su_m_quick_eg1, vars = c(x0, x2)))
expect_silent(tv <- typical_values(su_m_quick_eg1, vars = c(x0, x2),
data = quick_eg1, envir = teardown_env()))
expect_s3_class(tv, "tbl_df")
expect_identical(nrow(tv), 1L)
expect_identical(ncol(tv), 2L)
expect_identical(names(tv), c("x0","x2"))
})

test_that("typical_values works when excluding terms", {
expect_silent(tv <- typical_values(su_m_quick_eg1, vars = !c(x0, x2)))
expect_silent(tv <- typical_values(su_m_quick_eg1, vars = !c(x0, x2),
data = quick_eg1, envir = teardown_env()))
expect_s3_class(tv, "tbl_df")
expect_identical(nrow(tv), 1L)
expect_identical(ncol(tv), 2L)
Expand Down Expand Up @@ -161,46 +167,52 @@ test_that("factor_combos works when there are no factor terms", {

# data_combos()
test_that("data_combos works with a GAM", {
expect_silent(dc <- data_combos(m_para_sm))
expect_silent(dc <- data_combos(m_para_sm,
envir = teardown_env(), data = df_2_fac))
expect_s3_class(dc, "tbl_df")
expect_identical(nrow(dc), 12L)
expect_identical(ncol(dc), 5L)
expect_named(dc, c("fac", "ff", "x0", "x1", "x2"))
})

test_that("data_combos works when including terms", {
expect_silent(dc <- data_combos(m_para_sm, vars = c(fac, x0)))
expect_silent(dc <- data_combos(m_para_sm, vars = c(fac, x0),
data = df_2_fac, envir = teardown_env()))
expect_s3_class(dc, "tbl_df")
expect_identical(nrow(dc), 12L)
expect_identical(ncol(dc), 2L)
expect_named(dc, c("fac", "x0"))
})

test_that("data_combos works when exluding terms", {
expect_silent(dc <- data_combos(m_para_sm, vars = !c(fac, x0)))
expect_silent(dc <- data_combos(m_para_sm, vars = !c(fac, x0),
data = df_2_fac, envir = teardown_env()))
expect_s3_class(dc, "tbl_df")
expect_identical(nrow(dc), 12L)
expect_identical(ncol(dc), 3L)
expect_named(dc, c("ff", "x1", "x2"))
})

test_that("data_combos works when there are no factor terms", {
expect_silent(dc <- data_combos(m_gam))
expect_silent(dc <- data_combos(m_gam,
data = su_eg1, envir = teardown_env()))
expect_identical(nrow(dc), 1L)
expect_identical(ncol(dc), 4L)
expect_named(dc, c("x0", "x1", "x2", "x3"))
})

# Test data_slice with models that have an offset(s) - # 189
test_that("data_slice with no args works with models with an offset", {
expect_silent(ds <- data_slice(m_1_smooth_offset))
expect_silent(ds <- data_slice(m_1_smooth_offset,
data = quick_eg1_off, envir = teardown_env()))
expect_identical(nrow(ds), 1L)
expect_identical(ncol(ds), 2L)
expect_identical(ds$off, 2)
})

test_that("data_slice with works with models with an offset", {
expect_silent(ds <- data_slice(m_1_smooth_offset, off = 1))
expect_silent(ds <- data_slice(m_1_smooth_offset, off = 1,
data = quick_eg1_off, envir = teardown_env()))
expect_identical(nrow(ds), 1L)
expect_identical(ncol(ds), 2L)
expect_identical(ds$off, 1)
Expand Down Expand Up @@ -229,3 +241,9 @@ test_that("data_slice works for a data frame", {
expect_snapshot(ds, cran = FALSE)
expect_identical(nrow(ds), 625L)
})

# make sure #222 remains fixed
test_that("issue 222 is fixed", {
expect_silent(tv <- typical_values(m_logical, envir = teardown_env(),
data = logi_df))
})
3 changes: 2 additions & 1 deletion tests/testthat/test-lp-matrix.R
Expand Up @@ -9,7 +9,8 @@ test_that("lp_matrix works for a GAM", {
expect_s3_class(xp, "lp_matrix")
expect_s3_class(xp, "matrix")

expect_silent(ds <- data_slice(m_gam, x2 = evenly(x2, n = 50)))
expect_silent(ds <- data_slice(m_gam, x2 = evenly(x2, n = 50),
data = su_eg1, envir = teardown_env()))
expect_silent(xp <- lp_matrix(m_gam, data = ds))
expect_s3_class(xp, "lp_matrix")
expect_s3_class(xp, "matrix")
Expand Down

0 comments on commit ec064d4

Please sign in to comment.