/
select_pcs.R
80 lines (68 loc) · 2.61 KB
/
select_pcs.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#' Select principal components
#'
#' \code{select_pcs} returns a list of principal components as chosen by one of three methods (from D.A. Jackson (1993). Ecology, 74, 2204-2214).
#'
#' @param pca_output Output from \code{gm.prcomp} or \code{prcomp}
#' @param method Method used to select PCs. Defaults to the most conservative "broken_stick". Other methods are "kaiser_guttman" (PCs with eigenvalues greater than the mean eigenvalue) and "total_variance" (PCs explaining at least \code{total_var} variance)
#' @param total_var Total variance to choose for "total_variance" method. Defaults to .95
#' @return Returns selected PCs and variance explained
#'
#' @note Code for "broken_stick" method adapted from function \href{http://adn.biol.umontreal.ca/~numericalecology/numecolR/}{"evplot" by Francois Gillet}.
#'
#' @export
select_pcs <- function(pca_output, method = c("broken_stick", "kaiser_guttman", "total_variance"), total_var = .95) {
# check that "prcomp" is in one class name
# and that the PCA_output has sdev
if (!(grepl("prcomp", class(pca_output)) %>% any()) ||
!"sdev" %in% names(pca_output)) {
stop("pca_output must be a prcomp object")
}
method <- match.arg(method)
ev <- pca_output$sdev^2
var <- ev / sum(ev)
n <- length(ev)
if (method == "broken_stick" | method == "bs") {
bsm <- data.frame(j = seq(1:n), p = 0)
bsm$p[1] <- 1/n
for (i in 2:n) bsm$p[i] <- bsm$p[i - 1] + (1 / (n + 1 - i))
bsm$p <- 100 * bsm$p / n
test <- cbind(100 * ev / sum(ev), bsm$p[n:1])
n_PCs <- sum(test[, 1] >= test[, 2])
} else if (method == "kaiser_guttman" | method == "kg") {
# return PCs with eigenvalues greater than the mean eigenvalue
n_PCs <- sum(ev >= mean(ev));
} else if (method == "total_variance" | method == "tv") {
# return PCs explaining at least total.var variance
cumvar <- cumsum(var);
n_PCs <- sum(cumvar < total_var) + 1;
}
# Print PCs and variance explained
selected <- tibble::tibble(
"SD" = pca_output$sdev,
"Variance" = var,
"Cum Var" = cumsum(var),
"PC" = paste0("PC", seq(1, n, 1))
) %>%
dplyr::slice(1:n_PCs) %>%
dplyr::mutate(dplyr::across("SD":"Cum Var", ~round(.x, 3))) %>%
tibble::column_to_rownames(var = "PC")
to_return <- list(
selected = selected,
n = n_PCs,
method = method
)
class(to_return) <- "selected_PCs"
return(to_return)
}
#' Print for selected_PCs
#'
#' @param x a list of class selected_PCs
#' @param ... arguments passed to or from other methods
#'
#' @return prints the selected PCs
#' @keywords internal
#'
print.selected_PCs <- function(x, ...) {
print(x$selected)
invisible(x)
}