Skip to content

Commit

Permalink
initial sfpca implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
alexpghayes committed Mar 19, 2018
1 parent 58c6e57 commit accd2e6
Show file tree
Hide file tree
Showing 23 changed files with 1,274 additions and 32 deletions.
4 changes: 4 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
^.*\.Rproj$
^\.Rproj\.user$
^LICENSE\.md$
^README\.Rmd$
32 changes: 1 addition & 31 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,33 +1,3 @@
# History files
.Rproj.user
.Rhistory
.Rapp.history

# Session Data files
.RData

# Example code in package build process
*-Ex.R

# Output files from R CMD build
/*.tar.gz

# Output files from R CMD check
/*.Rcheck/

# RStudio files
.Rproj.user/

# produced vignettes
vignettes/*.html
vignettes/*.pdf

# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
.httr-oauth

# knitr and R markdown default cache directories
/*_cache/
/cache/

# Temporary files created by R markdown
*.utf8.md
*.knit.md
22 changes: 22 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
Package: moma
Title: Application for GSOC MoMA Project
Version: 0.0.0.9000
Authors@R: person("Alex", "Hayes", email = "alexpghayes@gmail.com", role = c("aut", "cre"))
Description: Computes the L1-penalized rank 1 sparse and functional matrix
factorization described in Sparse and Functional Principal Components
Analysis (Allen, 2013). Package is Alex's Hayes application to the
Google Summer of Code program.
Depends: R (>= 3.4.1)
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
Suggests:
testthat
RoxygenNote: 6.0.1
Imports:
irlba,
Rcpp,
RcppArmadillo
LinkingTo:
Rcpp,
RcppArmadillo
2 changes: 2 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
YEAR: 2018
COPYRIGHT HOLDER: Alex Hayes
21 changes: 21 additions & 0 deletions LICENSE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# MIT License

Copyright (c) 2018 Alex Hayes

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Generated by roxygen2: do not edit by hand

export(sfpca)
export(sfpca_r)
importFrom(Rcpp,sourceCpp)
useDynLib(moma, .registration = TRUE)
7 changes: 7 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

sfpca_arma <- function(X, lambda_u, lambda_v, alpha_u, alpha_v, Omega_u, Omega_v) {
.Call(`_moma_sfpca_arma`, X, lambda_u, lambda_v, alpha_u, alpha_v, Omega_u, Omega_v)
}

125 changes: 125 additions & 0 deletions R/sfpca.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#' @useDynLib moma, .registration = TRUE
#' @importFrom Rcpp sourceCpp
NULL

zeros_like <- function(x) rep(0, length(x)) # vectors only
op_norm <- function(x, A) sqrt(t(x) %*% A %*% x) # operator norm
l2_norm <- function(x) sqrt(sum(x^2))
prox_l1 <- function(y, lambda) sign(y) * pmax(abs(y) - lambda, 0)

max_eigenval <- function(X) {
# add 0.01 to agree with MATLAB implementation
max(eigen(X, symmetric = FALSE, only.values = TRUE)$values) + 0.01
}

#' Compute rank 1 sparse and functional principal components
#'
#' @param X A data matrix. Considered n x p by convention.
#' @param lambda_u Sparsity parameter for left singular vectors.
#' @param lambda_v Sparsity parameter for right singular vectors.
#' @param alpha_u Smoothness parameter for left singular vectors.
#' @param alpha_v Smoothness parameter for right singular vectors.
#' @param Omega_u Roughness penalty matrix (n x n) for left singular vectors.
#' Must be positive semi-definite.
#' @param Omega_v Roughness penalty matrix (p x p) for right singular vectors.
#' Must be positive semi-definite.
#'
#' @return List with three named elements: left singular vector u, eigenval d,
#' and right singular vector v.
#' @export
sfpca <- function(X, lambda_u = 0, lambda_v = 0, alpha_u = 0, alpha_v = 0,
Omega_u = diag(nrow(X)), Omega_v = diag(ncol(X))) {

# R wrapper around C++ implementation for lazily argument evaluation and
# convenient argument checking

stopifnot(lambda_u >= 0)
stopifnot(lambda_v >= 0)
stopifnot(alpha_u >= 0)
stopifnot(alpha_v >= 0)
stopifnot(all(svd(Omega_u)$d >= 0)) # check if positive semi-definite
stopifnot(all(svd(Omega_v)$d >= 0))

sfpca_arma(X, lambda_u, lambda_v, alpha_u, alpha_v, Omega_u, Omega_v)
}

#' Compute rank 1 sparse and functional principal components
#'
#' @param X A data matrix. Considered n x p by convention.
#' @param lambda_u Sparsity parameter for left singular vectors.
#' @param lambda_v Sparsity parameter for right singular vectors.
#' @param alpha_u Smoothness parameter for left singular vectors.
#' @param alpha_v Smoothness parameter for right singular vectors.
#' @param Omega_u Roughness penalty matrix (n x n) for left singular vectors.
#' Must be positive semi-definite.
#' @param Omega_v Roughness penalty matrix (p x p) for right singular vectors.
#' Must be positive semi-definite.
#'
#' @return List with three named elements: left singular vector u, eigenval d,
#' and right singular vector v.
#' @export
sfpca_r <- function(X, lambda_u = 0, lambda_v = 0, alpha_u = 0, alpha_v = 0,
Omega_u = diag(nrow(X)), Omega_v = diag(ncol(X))) {

stopifnot(lambda_u >= 0)
stopifnot(lambda_v >= 0)
stopifnot(alpha_u >= 0)
stopifnot(alpha_v >= 0)
stopifnot(all(svd(Omega_u)$d >= 0)) # check if positive semi-definite
stopifnot(all(svd(Omega_v)$d >= 0))

# some conventions so the following code matches the paper:
# - u: left singular vectors, or related to
# - v: right singular vectors, or related to
# - d: eigenvalues
# - vectors are lowercase (u, v), matrices uppercase (X, Omega_u)
# - underscores indicate subscripts, LaTeX style
# - X is an n x p matrix

n <- nrow(X)
p <- ncol(X)

# initialize u, v to rank one SVD solution

tsvd <- irlba::irlba(X, 1, 1)
u <- tsvd$u # n x 1 matrix
v <- tsvd$v # p x 1 matrix

# multiplication by n and p here is to agree with the MATLAB implementation
S_u <- diag(n) + alpha_u * Omega_u * n
S_v <- diag(p) + alpha_v * Omega_v * p

L_u <- max_eigenval(S_u)
L_v <- max_eigenval(S_v)

tol <- 1e-6
delta_u <- tol + 1
delta_v <- tol + 1

while (delta_u > tol & delta_v > tol) {

while (delta_u > tol) {
old_u <- u
u <- prox_l1(u + (X %*% v - S_u %*% u) / L_u, lambda_u / L_u)
u_norm <- as.numeric(op_norm(u, S_u))
u <- if (u_norm > 0) u / u_norm else zeros_like(u)
delta_u <- l2_norm(u - old_u)
}

# same as u case except place with v, and add a single transposition
while (delta_v > tol) {
old_v <- v
v <- prox_l1(v + (t(X) %*% u - S_v %*% v) / L_v, lambda_v / L_v)
v_norm <- as.numeric(op_norm(v, S_v))
v <- if (v_norm > 0) v / v_norm else zeros_like(v)
delta_v <- l2_norm(v - old_v)
}
}

# normalize eigenvectors and calculate eigenvalue
u <- if (l2_norm(u) > 0) u / l2_norm(u) else u
v <- if (l2_norm(v) > 0) v / l2_norm(v) else v
d <- as.numeric(t(u) %*% X %*% v)

list(u = u, d = d, v = v)
}
45 changes: 45 additions & 0 deletions README.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
---
output: github_document
---

<!-- README.md is generated from README.Rmd. Please edit that file -->

```{r setup, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
out.width = "100%"
)
```

# GSOC Modern Multivariate Analysis

**This application is in progress and is not yet complete**

So far I have:
- Implemented L1 penalized rank 1 SFPCA algorithm in R
- Implemented L1 penalized rank 1 SFPCA algorithm in C++ using Armadillo

Next steps:
- Legitimate tests based on the provided MATLAB code
- Make sure to pass R CMD check
- Some general code cleanup

At this point both the R and C++ code runs and I get the same values when on the `iris` dataset:

```{r}
library(moma)
X <- as.matrix(iris[, 1:4])
sfpca(X, 10, 4)
sfpca_r(X, 10, 4)
```

The results match up with a standard rank 1 SVD when no penalties are included.

```{r}
irlba::irlba(X, 1, 1)
sfpca(X)
```


Loading

0 comments on commit accd2e6

Please sign in to comment.