-
Notifications
You must be signed in to change notification settings - Fork 3
/
log_contrast.R
87 lines (79 loc) · 2.74 KB
/
log_contrast.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#' Perform sparse log-contrast regression
#'
#' Solves the constrained lasso problem using the CLASSO module
#' in Python. The optimization problem is
#'
#' minimize_{beta, beta0} 1/(2n) || y - beta0 1_n - Zagg_clr beta ||^2
#' + lamda_max * frac || beta ||_1
#' subject to C beta = 0
#'
#' Default is C = 1_p^T, but C can be a general matrix.
#'
#' Observe that the tuning parameter is specified through "frac", the fraction
#' of lamda_max (which is the smallest value for which beta is nonzero).
#'
#' @param Z n by p matrix containing log(X)
#' @param y n vector (response)
#' @param C m by p matrix. Default is a row vector of ones.
#' @param fraclist (optional) vector of tuning parameter multipliers. Should be in (0, 1].
#' @param nlam number of tuning parameters (ignored if fraclist non-NULL)
#' @param min_frac smallest value of tuning parameter multiplier (ignored if
#' fraclist non-NULL)
#'
#' @export
sparse_log_contrast <- function(Z, y, C = NULL, fraclist = NULL, nlam = 20, min_frac = 1e-4) {
n <- length(y)
stopifnot(nrow(Z) == n)
p <- ncol(Z)
if (is.null(C)) C <- matrix(1, nrow = 1, ncol = p)
# CLASSO can solve a problem of the form
#
# (based on https://github.com/Leo-Simpson/c-lasso)
# [R1] Standard constrained Lasso regression:
# minimize_{beta} || y - X beta ||^2 + lam ||beta||_1
# subject to C beta = 0
#
# Define
# yt = y - ybar 1_n
# M = Z_clr - 1_n v^T, where v = colMeans(Z_clr)
#
if(is.null(fraclist))
fraclist <- exp(seq(0, log(min_frac), length = nlam))
Zbar <- Matrix::rowMeans(Z)
Z_clr <- Z - Zbar
ybar <- mean(y)
yt <- y - ybar
v <- Matrix::colMeans(Z_clr)
M <- Matrix::t(Matrix::t(Z_clr) - v)
fit <- list()
X_classo <- as.matrix(M)
# set up CLASSO problem:
prob <- classo$classo_problem(X = X_classo,
C = C,
y = array(yt))
prob$formulation$classification <- FALSE
prob$formulation$concomitant <- FALSE
prob$formulation$huber <- FALSE
prob$model_selection$PATH <- TRUE
prob$model_selection$CV <- FALSE
prob$model_selection$LAMfixed <- FALSE
prob$model_selection$StabSel <- FALSE
prob$model_selection$PATHparameters$lambdas <- fraclist
# solve it
prob$solve()
# extract outputs
beta <- t(prob$solution$PATH$BETAS)
# purrr::map(prob$solution$PATH$BETAS, as.numeric) %>%
# rlang::set_names(paste0("V", 1:length(prob$solution$PATH$BETAS))) %>%
# dplyr::bind_cols() %>%
# as.matrix()
lambda_classo <- prob$model_selection$PATHparameters$lambdas
beta0 <- ybar - crossprod(beta, v)
rownames(beta) <- colnames(Z)
list(beta0 = beta0,
beta = beta,
C = C,
fraclist = lambda_classo, # / (2 * n),
fit_classo = prob,
refit = FALSE)
}