/
variance_inflation.R
86 lines (77 loc) 路 1.98 KB
/
variance_inflation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#' Variance inflation factor
#'
#' @param mod A model object (or data.frame). For the moment only `lm` and a
#' plain dataset is implemented
#' @param ... not used
#' @rdname vif
#' @return a matrix with for each variable de variance inflation factor,
#' the degrees of freedom en the rescaled variance inflation factor based on
#' the degrees of freedom.
#' @export
#'
#' @family Statistics
vif <- function(mod, ...) {
UseMethod("vif")
}
#---------------------------
#' Variance inflation factor (`lm` model)
#'
#' @export
#' @method vif lm
#' @rdname vif
vif.lm <- function(mod, ...) {
vif.default(mod, ...)
}
#' Variance inflation factor (data.frame)
#'
#' @export
#' @method vif data.frame
#' @rdname vif
vif.data.frame <- function(mod, ...) {
mod <- cbind(dummyResponse = 1, mod)
my_formula <- as.formula(mod)
mod <- lm(my_formula, data = mod)
vif.default(mod, ...)
}
#' Variance inflation factor (default routine)
#'
#' @export
#' @method vif default
#' @rdname vif
#' @examples{
#' vif(airquality)
#' }
#' @importFrom stats as.formula coef coefficients cov2cor lm model.matrix vcov
vif.default <- function(mod, ...) {
if (any(is.na(coef(mod)))) {
stop("there are aliased coefficients in the model")
}
v <- vcov(mod)
assign <- attr(model.matrix(mod), "assign")
if (names(coefficients(mod)[1]) == "(Intercept)") {
v <- v[-1, -1]
assign <- assign[-1]
} else {
warning("No intercept: vifs may not be sensible.")
}
terms <- labels(terms(mod))
n_terms <- length(terms)
if (n_terms < 2) {
stop("model contains fewer than 2 terms")
}
r <- cov2cor(v)
det_r <- det(r)
result <- matrix(0, n_terms, 3)
rownames(result) <- terms
colnames(result) <- c("GVIF", "Df", "GVIF^(1/(2*Df))")
for (term in 1:n_terms) {
subs <- which(assign == term)
result[term, 1] <- det(as.matrix(r[subs, subs])) * det(as.matrix(r[
-subs,
-subs
])) / det_r
result[term, 2] <- length(subs)
}
result[, 3] <- result[, 1]^(1 / (2 * result[, 2]))
result
}