/
partialize.R
179 lines (152 loc) · 7.35 KB
/
partialize.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# Avoid duplicating code for the workhorse parts of the S3 method
do_partials <- function(model, vars = NULL, data = NULL, at = NULL,
center = TRUE, scale = c("response", "link"),
set.offset = 1, formula = NULL, ...) {
# Get the original data if new data are not provided
if (is.null(data)) {
data <- get_data(model)
}
# Drop missing observations
data <- drop_missing(model, data)
if (is.null(formula)) {formula <- get_formula(model, ...)}
# Make sure the vars are in the data
if (any(vars %nin% names(data))) {
stop_wrap(paste(vars %not% names(data), collapse = " and "),
" not found in the data.")
}
# Make sure the vars are in the model
if (any(vars %nin% original_terms(formula))) {
the_terms <- original_terms(formula)
stop_wrap(paste(vars %not% the_terms, collapse = " and "),
" not found in the model.")
}
# Get fixed values of non-focal variables
controls <- get_control_values(model = model, data = data, preds = vars,
at = at, center = center,
set.offset = set.offset, ...)
# Assign those values to the original data
for (n in names(controls)) {
data[[n]] <- controls[[n]]
}
resp <- as.character(deparse(get_lhs(formula)))
predicted <- make_predictions(
model = model, pred = NULL, new_data = data, interval = FALSE,
outcome.scale = scale, return.orig.data = FALSE, set.offset = set.offset,
data = data, ...
)
# Add a column to data with the predictions (replace original if it's there)
data[[resp]] <- predicted[[resp]]
return(data)
}
#' @rdname partialize
#' @export
partialize <- function(model, ...) {
UseMethod("partialize")
}
#' @title Adjust observed data for partial residuals plots
#' @description This function is designed to facilitate the creation of partial
#' residual plots, in which you can plot observed data alongside model
#' predictions. The difference is instead of the *actual* observed data, the
#' outcome variable is adjusted for the effects of the covariates.
#' @param model A regression model.
#' @param vars The variable(s) to *not* adjust for, as a string (or vector of
#' strings). If I want to show the effect of `x` adjusting for the effect of
#' `z`, then I would make `"x"` the `vars` argument.
#' @param data Optionally, provide the data used to fit the model (or some
#' other data frame with the same variables). Otherwise, it will be retrieved
#' from the model or the global environment.
#' @param scale For GLMs, should the outcome variable be returned on the link
#' scale or response scale? Default is `"response"`.
#' @inheritParams make_predictions
#' @return `data` plus the residualized outcome variable.
#' @details
#' The main use for working with partial residuals rather than the observed
#' values is to explore patterns in the model fit with respect to one or more
#' variables while "controlling out" the effects of others. Plotting a
#' predicted line along with observed data may make a very well-fitting model
#' look as if it is a poor fit if a lot of variation is accounted for by
#' variables other than the one on the x-axis.
#'
#' I advise consulting Fox and Weisberg (available free) for more details
#' on what partial residuals are. This function is designed to produce
#' data in a similar format to `effects::Effect()` when that function has
#' `residuals` set to `TRUE` and is plotted. I wanted a more modular function
#' to produce the data separately. To be clear, the developers of the `effects`
#' package have nothing to do with this function; `partialize`` is merely
#' designed to replicate some of that functionality.
#' @references
#' Fox, J., & Weisberg, S. (2018). Visualizing fit and lack of fit in complex
#' regression models with predictor effect plots and partial residuals.
#' *Journal of Statistical Software*, *87*(9), 1–27.
#' https://doi.org/10.18637/jss.v087.i09
#' @rdname partialize
#' @export
partialize.default <- function(model, vars = NULL, data = NULL, at = NULL,
center = TRUE, scale = c("response", "link"),
set.offset = 1, ...) {
predicted <- do_partials(model, vars = vars, data = data, at = at,
center = center, scale = "link",
set.offset = set.offset, ...)
resp <- get_response_name(model)
# Add the residuals to the predictions
resids <- residuals(model, type = "working")
predicted[[resp]] <- predicted[[resp]] + resids[!is.na(resids)]
# If we want it on the response scale, we need to transform back to it
if (scale[1] == "response") {
predicted[[resp]] <- family(model)$linkinv(predicted[[resp]])
}
return(tibble::as_tibble(predicted))
}
#' @export
partialize.brmsfit <- function(model, vars = NULL, data = NULL, at = NULL,
center = TRUE, scale = c("response", "link"),
set.offset = 1, formula = NULL, resp = NULL,
dpar = NULL, ...) {
predicted <- do_partials(model, vars = vars, data = data, at = at,
center = center, scale = "response",
set.offset = set.offset, formula = formula,
resp = resp, dpar = dpar, ...)
resp <- get_response_name(model, resp = resp, dpar = dpar)
# Add the residuals to the predictions
resids <- residuals(model, type = "ordinary", resp = resp)[,"Estimate"]
predicted[[resp]] <- predicted[[resp]] + resids[!is.na(resids)]
# If we want it on the link scale, we need to transform back to it
# brms doesn't do predictions on the link scale, so this is the opposite
# of the default behavior.
if (scale[1] == "link") {
predicted[[resp]] <- family(model)$linkfun(predicted[[resp]])
}
return(tibble::as_tibble(predicted))
}
#' @export
partialize.stanreg <- function(model, vars = NULL, data = NULL, at = NULL,
center = TRUE, scale = c("response", "link"),
set.offset = 1, ...) {
predicted <- do_partials(model, vars = vars, data = data, at = at,
center = center, scale = "response",
set.offset = set.offset, ...)
resp <- get_response_name(model)
# Add the residuals to the predictions
resids <- residuals(model)
predicted[[resp]] <- predicted[[resp]] + resids[!is.na(resids)]
# If we want it on the link scale, we need to transform back to it
# brms doesn't do predictions on the link scale, so this is the opposite
# of the default behavior.
if (scale[1] == "link") {
predicted[[resp]] <- family(model)$linkfun(predicted[[resp]])
}
return(tibble::as_tibble(predicted))
}
#' @export
partialize.rq <- function(model, vars = NULL, data = NULL, at = NULL,
center = TRUE, scale = c("response", "link"),
set.offset = 1, ...) {
predicted <- do_partials(model, vars = vars, data = data, at = at,
center = center, scale = "link",
set.offset = set.offset, ...)
resp <- get_response_name(model)
# Add the residuals to the predictions
resids <- residuals(model, type = "working")
predicted[[resp]] <- predicted[[resp]] + resids[!is.na(resids)]
return(tibble::as_tibble(predicted))
}