-
Notifications
You must be signed in to change notification settings - Fork 10
/
step_spca.R
138 lines (122 loc) · 4.5 KB
/
step_spca.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#' Sparse Principal Components Analysis Variable Reduction
#'
#' Creates a \emph{specification} of a recipe step that will derive sparse
#' principal components from one or more numeric variables.
#'
#' @inheritParams step_lincomp
#' @param sparsity,num_var sparsity (L1 norm) penalty for each component or
#' number of variables with non-zero component loadings. Larger sparsity
#' values produce more zero loadings. Argument \code{sparsity} is ignored if
#' \code{num_var} is given. The argument value may be a single number
#' applied to all components or a vector of component-specific numbers.
#' @param shrinkage numeric shrinkage (quadratic) penalty for the components to
#' improve conditioning; larger values produce more shrinkage of component
#' loadings toward zero.
#' @param center,scale logicals indicating whether to mean center and standard
#' deviation scale the original variables prior to deriving components, or
#' functions or names of functions for the centering and scaling.
#' @param max_iter maximum number of algorithm iterations allowed.
#' @param tol numeric tolerance for the convergence criterion.
#' @param x \code{step_spca} object.
#'
#' @return Function \code{step_spca} creates a new step whose class is of
#' the same name and inherits from \code{\link{step_lincomp}}, adds it to the
#' sequence of existing steps (if any) in the recipe, and returns the updated
#' recipe. For the \code{tidy} method, a tibble with columns \code{terms}
#' (selectors or variables selected), \code{weight} of each variable loading in
#' the components, and \code{name} of the new variable names; and with
#' attribute \code{pev} containing the proportions of explained variation.
#'
#' @details
#' Sparse principal components analysis (SPCA) is a variant of PCA in which
#' the original variables may have zero loadings in the linear combinations
#' that form the components.
#'
#' @references
#' Zou, H., Hastie, T., & Tibshirani, R. (2006). Sparse principal component
#' analysis. \emph{Journal of Computational and Graphical Statistics},
#' \emph{15}(2), 265-286.
#'
#' @seealso \code{\link[elasticnet]{spca}}, \code{\link[recipes]{recipe}},
#' \code{\link[recipes]{prep}}, \code{\link[recipes]{bake}}
#'
#' @examples
#' library(recipes)
#'
#' rec <- recipe(rating ~ ., data = attitude)
#' spca_rec <- rec %>%
#' step_spca(all_predictors(), num_comp = 5, sparsity = 1)
#' spca_prep <- prep(spca_rec, training = attitude)
#' spca_data <- bake(spca_prep, attitude)
#'
#' pairs(spca_data, lower.panel = NULL)
#'
#' tidy(spca_rec, number = 1)
#' tidy(spca_prep, number = 1)
#'
step_spca <- function(
recipe, ..., num_comp = 5, sparsity = 0, num_var = integer(),
shrinkage = 1e-6, center = TRUE, scale = TRUE, max_iter = 200, tol = 1e-3,
replace = TRUE, prefix = "SPCA", role = "predictor", skip = FALSE,
id = recipes::rand_id("spca")
) {
recipes::add_step(recipe, new_step_spca(
terms = recipes::ellipse_check(...),
num_comp = num_comp,
sparsity = sparsity,
num_var = num_var,
shrinkage = shrinkage,
center = center,
scale = scale,
max_iter = max_iter,
tol = tol,
replace = replace,
prefix = prefix,
role = role,
skip = skip,
id = id
))
}
new_step_spca <- function(..., sparsity, num_var, shrinkage, max_iter,
tol) {
throw(check_packages("elasticnet"))
transform <- function(x, step) {
throw(check_packages("elasticnet"))
num_comp <- min(step$num_comp, nrow(x))
if (is_empty(step$num_var)) {
para <- step$sparsity
sparse <- "penalty"
} else {
para <- step$num_var
sparse <- "varnum"
}
res <- elasticnet::spca(x, K = num_comp,
para = rep_len(para, num_comp),
sparse = sparse, lambda = step$shrinkage,
max.iter = step$max_iter, eps.conv = step$tol)
list(weights = res$loadings, pev = res$pev)
}
options <- list(
sparsity = sparsity,
num_var = num_var,
shrinkage = shrinkage,
max_iter = max_iter,
tol = tol
)
object <- new_step_lincomp(..., transform = transform, options = options)
structure(object, class = c("step_spca", class(object)))
}
#' @rdname step_spca
#'
tunable.step_spca <- function(x, ...) {
tibble(
name = c("num_comp", "sparsity"),
call_info = list(
list(pkg = "dials", fun = "num_comp", range = c(1, 4)),
list(pkg = "dials", fun = "penalty", range = c(-10, 0))
),
source = "MachineShop",
component = "step_spca",
component_id = x$id
)
}