-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
58c6e57
commit accd2e6
Showing
23 changed files
with
1,274 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
^.*\.Rproj$ | ||
^\.Rproj\.user$ | ||
^LICENSE\.md$ | ||
^README\.Rmd$ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,33 +1,3 @@ | ||
# History files | ||
.Rproj.user | ||
.Rhistory | ||
.Rapp.history | ||
|
||
# Session Data files | ||
.RData | ||
|
||
# Example code in package build process | ||
*-Ex.R | ||
|
||
# Output files from R CMD build | ||
/*.tar.gz | ||
|
||
# Output files from R CMD check | ||
/*.Rcheck/ | ||
|
||
# RStudio files | ||
.Rproj.user/ | ||
|
||
# produced vignettes | ||
vignettes/*.html | ||
vignettes/*.pdf | ||
|
||
# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 | ||
.httr-oauth | ||
|
||
# knitr and R markdown default cache directories | ||
/*_cache/ | ||
/cache/ | ||
|
||
# Temporary files created by R markdown | ||
*.utf8.md | ||
*.knit.md |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
Package: moma | ||
Title: Application for GSOC MoMA Project | ||
Version: 0.0.0.9000 | ||
Authors@R: person("Alex", "Hayes", email = "alexpghayes@gmail.com", role = c("aut", "cre")) | ||
Description: Computes the L1-penalized rank 1 sparse and functional matrix | ||
factorization described in Sparse and Functional Principal Components | ||
Analysis (Allen, 2013). Package is Alex's Hayes application to the | ||
Google Summer of Code program. | ||
Depends: R (>= 3.4.1) | ||
License: MIT + file LICENSE | ||
Encoding: UTF-8 | ||
LazyData: true | ||
Suggests: | ||
testthat | ||
RoxygenNote: 6.0.1 | ||
Imports: | ||
irlba, | ||
Rcpp, | ||
RcppArmadillo | ||
LinkingTo: | ||
Rcpp, | ||
RcppArmadillo |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
YEAR: 2018 | ||
COPYRIGHT HOLDER: Alex Hayes |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# MIT License | ||
|
||
Copyright (c) 2018 Alex Hayes | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(sfpca) | ||
export(sfpca_r) | ||
importFrom(Rcpp,sourceCpp) | ||
useDynLib(moma, .registration = TRUE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# Generated by using Rcpp::compileAttributes() -> do not edit by hand | ||
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 | ||
|
||
sfpca_arma <- function(X, lambda_u, lambda_v, alpha_u, alpha_v, Omega_u, Omega_v) { | ||
.Call(`_moma_sfpca_arma`, X, lambda_u, lambda_v, alpha_u, alpha_v, Omega_u, Omega_v) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
#' @useDynLib moma, .registration = TRUE | ||
#' @importFrom Rcpp sourceCpp | ||
NULL | ||
|
||
zeros_like <- function(x) rep(0, length(x)) # vectors only | ||
op_norm <- function(x, A) sqrt(t(x) %*% A %*% x) # operator norm | ||
l2_norm <- function(x) sqrt(sum(x^2)) | ||
prox_l1 <- function(y, lambda) sign(y) * pmax(abs(y) - lambda, 0) | ||
|
||
max_eigenval <- function(X) { | ||
# add 0.01 to agree with MATLAB implementation | ||
max(eigen(X, symmetric = FALSE, only.values = TRUE)$values) + 0.01 | ||
} | ||
|
||
#' Compute rank 1 sparse and functional principal components | ||
#' | ||
#' @param X A data matrix. Considered n x p by convention. | ||
#' @param lambda_u Sparsity parameter for left singular vectors. | ||
#' @param lambda_v Sparsity parameter for right singular vectors. | ||
#' @param alpha_u Smoothness parameter for left singular vectors. | ||
#' @param alpha_v Smoothness parameter for right singular vectors. | ||
#' @param Omega_u Roughness penalty matrix (n x n) for left singular vectors. | ||
#' Must be positive semi-definite. | ||
#' @param Omega_v Roughness penalty matrix (p x p) for right singular vectors. | ||
#' Must be positive semi-definite. | ||
#' | ||
#' @return List with three named elements: left singular vector u, eigenval d, | ||
#' and right singular vector v. | ||
#' @export | ||
sfpca <- function(X, lambda_u = 0, lambda_v = 0, alpha_u = 0, alpha_v = 0, | ||
Omega_u = diag(nrow(X)), Omega_v = diag(ncol(X))) { | ||
|
||
# R wrapper around C++ implementation for lazily argument evaluation and | ||
# convenient argument checking | ||
|
||
stopifnot(lambda_u >= 0) | ||
stopifnot(lambda_v >= 0) | ||
stopifnot(alpha_u >= 0) | ||
stopifnot(alpha_v >= 0) | ||
stopifnot(all(svd(Omega_u)$d >= 0)) # check if positive semi-definite | ||
stopifnot(all(svd(Omega_v)$d >= 0)) | ||
|
||
sfpca_arma(X, lambda_u, lambda_v, alpha_u, alpha_v, Omega_u, Omega_v) | ||
} | ||
|
||
#' Compute rank 1 sparse and functional principal components | ||
#' | ||
#' @param X A data matrix. Considered n x p by convention. | ||
#' @param lambda_u Sparsity parameter for left singular vectors. | ||
#' @param lambda_v Sparsity parameter for right singular vectors. | ||
#' @param alpha_u Smoothness parameter for left singular vectors. | ||
#' @param alpha_v Smoothness parameter for right singular vectors. | ||
#' @param Omega_u Roughness penalty matrix (n x n) for left singular vectors. | ||
#' Must be positive semi-definite. | ||
#' @param Omega_v Roughness penalty matrix (p x p) for right singular vectors. | ||
#' Must be positive semi-definite. | ||
#' | ||
#' @return List with three named elements: left singular vector u, eigenval d, | ||
#' and right singular vector v. | ||
#' @export | ||
sfpca_r <- function(X, lambda_u = 0, lambda_v = 0, alpha_u = 0, alpha_v = 0, | ||
Omega_u = diag(nrow(X)), Omega_v = diag(ncol(X))) { | ||
|
||
stopifnot(lambda_u >= 0) | ||
stopifnot(lambda_v >= 0) | ||
stopifnot(alpha_u >= 0) | ||
stopifnot(alpha_v >= 0) | ||
stopifnot(all(svd(Omega_u)$d >= 0)) # check if positive semi-definite | ||
stopifnot(all(svd(Omega_v)$d >= 0)) | ||
|
||
# some conventions so the following code matches the paper: | ||
# - u: left singular vectors, or related to | ||
# - v: right singular vectors, or related to | ||
# - d: eigenvalues | ||
# - vectors are lowercase (u, v), matrices uppercase (X, Omega_u) | ||
# - underscores indicate subscripts, LaTeX style | ||
# - X is an n x p matrix | ||
|
||
n <- nrow(X) | ||
p <- ncol(X) | ||
|
||
# initialize u, v to rank one SVD solution | ||
|
||
tsvd <- irlba::irlba(X, 1, 1) | ||
u <- tsvd$u # n x 1 matrix | ||
v <- tsvd$v # p x 1 matrix | ||
|
||
# multiplication by n and p here is to agree with the MATLAB implementation | ||
S_u <- diag(n) + alpha_u * Omega_u * n | ||
S_v <- diag(p) + alpha_v * Omega_v * p | ||
|
||
L_u <- max_eigenval(S_u) | ||
L_v <- max_eigenval(S_v) | ||
|
||
tol <- 1e-6 | ||
delta_u <- tol + 1 | ||
delta_v <- tol + 1 | ||
|
||
while (delta_u > tol & delta_v > tol) { | ||
|
||
while (delta_u > tol) { | ||
old_u <- u | ||
u <- prox_l1(u + (X %*% v - S_u %*% u) / L_u, lambda_u / L_u) | ||
u_norm <- as.numeric(op_norm(u, S_u)) | ||
u <- if (u_norm > 0) u / u_norm else zeros_like(u) | ||
delta_u <- l2_norm(u - old_u) | ||
} | ||
|
||
# same as u case except place with v, and add a single transposition | ||
while (delta_v > tol) { | ||
old_v <- v | ||
v <- prox_l1(v + (t(X) %*% u - S_v %*% v) / L_v, lambda_v / L_v) | ||
v_norm <- as.numeric(op_norm(v, S_v)) | ||
v <- if (v_norm > 0) v / v_norm else zeros_like(v) | ||
delta_v <- l2_norm(v - old_v) | ||
} | ||
} | ||
|
||
# normalize eigenvectors and calculate eigenvalue | ||
u <- if (l2_norm(u) > 0) u / l2_norm(u) else u | ||
v <- if (l2_norm(v) > 0) v / l2_norm(v) else v | ||
d <- as.numeric(t(u) %*% X %*% v) | ||
|
||
list(u = u, d = d, v = v) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
--- | ||
output: github_document | ||
--- | ||
|
||
<!-- README.md is generated from README.Rmd. Please edit that file --> | ||
|
||
```{r setup, include = FALSE} | ||
knitr::opts_chunk$set( | ||
collapse = TRUE, | ||
comment = "#>", | ||
out.width = "100%" | ||
) | ||
``` | ||
|
||
# GSOC Modern Multivariate Analysis | ||
|
||
**This application is in progress and is not yet complete** | ||
|
||
So far I have: | ||
- Implemented L1 penalized rank 1 SFPCA algorithm in R | ||
- Implemented L1 penalized rank 1 SFPCA algorithm in C++ using Armadillo | ||
|
||
Next steps: | ||
- Legitimate tests based on the provided MATLAB code | ||
- Make sure to pass R CMD check | ||
- Some general code cleanup | ||
|
||
At this point both the R and C++ code runs and I get the same values when on the `iris` dataset: | ||
|
||
```{r} | ||
library(moma) | ||
X <- as.matrix(iris[, 1:4]) | ||
sfpca(X, 10, 4) | ||
sfpca_r(X, 10, 4) | ||
``` | ||
|
||
The results match up with a standard rank 1 SVD when no penalties are included. | ||
|
||
```{r} | ||
irlba::irlba(X, 1, 1) | ||
sfpca(X) | ||
``` | ||
|
||
|
Oops, something went wrong.