-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 8204b09
Showing
52 changed files
with
4,405 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
Release 0.3-0 (3/17/2016): | ||
* Rename package to 'coop'. | ||
* 'Namespaced' the internal lib. | ||
* Improved vignettes, package description. | ||
* Use https in inst/CITATION. | ||
* Release to CRAN. | ||
|
||
Release 0.2-2 (3/11/2016); | ||
* Split vignette into 2. | ||
* Improve R help. | ||
|
||
Release 0.2-1 (3/9/2016): | ||
* Better memory allocation checking. | ||
* Move sparsity functions from wrappers.c to sparse_utils.c. | ||
|
||
Release 0.2-0 (3/6/2016): | ||
* Add covariance and pearson correlation. | ||
* Rename package from fastcosim to fastco. | ||
* Major re-organization of internals. | ||
* Better tests. | ||
|
||
Release 0.1-0 (12/9/2016): | ||
* Add cosine() function. | ||
* Add dense matrix, dense vector-vector, and sparse matrix methods. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
Package: coop | ||
Type: Package | ||
Title: Co-Operation: Fast Covariance, Correlation, and Cosine | ||
Similarity Operations | ||
Version: 0.3-0 | ||
Description: Fast implementations of the co-operations: covariance, | ||
correlation, and cosine similarity. The implementations are | ||
fast and memory-efficient and their use is resolved | ||
automatically based on the input data, handled by R's S3 | ||
methods. Full descriptions of the algorithms and benchmarks | ||
are available in the package vignettes. | ||
License: BSD 2-clause License + file LICENSE | ||
Depends: R (>= 3.0.0) | ||
Enhances: slam (>= 0.1.32) | ||
Suggests: memuse, microbenchmark, rbenchmark, knitr, rmarkdown | ||
VignetteBuilder: knitr | ||
NeedsCompilation: yes | ||
ByteCompile: yes | ||
URL: https://github.com/wrathematics/coop | ||
BugReports: https://github.com/wrathematics/coop/issues | ||
Authors@R: c(person("Drew", "Schmidt", role=c("aut", "cre"), | ||
email="wrathematics@gmail.com"), | ||
person("Christian", "Heckendorf", role="ctb", comment="Caught some | ||
memory errors.")) | ||
Maintainer: Drew Schmidt <wrathematics@gmail.com> | ||
RoxygenNote: 5.0.1 | ||
Packaged: 2016-03-19 01:30:18 UTC; mschmid3 | ||
Author: Drew Schmidt [aut, cre], | ||
Christian Heckendorf [ctb] (Caught some memory errors.) | ||
Repository: CRAN | ||
Date/Publication: 2016-03-19 11:14:09 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
YEAR: 2015-2016 | ||
COPYRIGHT HOLDER: Drew Schmidt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
7d2519c37090938279a19b32989a77ed *ChangeLog | ||
d8cafd76f72418f23c85895fb5d26129 *DESCRIPTION | ||
32151991ac40f1c2a881a4fde17f3991 *LICENSE | ||
90e0e517762c825e4768e4471affe473 *NAMESPACE | ||
94a06c3901cce73726005c8e21c472e1 *R/00_global.r | ||
6ed271ef68df504b0ef25eca111e91ad *R/coop-package.r | ||
da2b64f777ab5a3c0855deeec565c1d1 *R/cosine.r | ||
e2f21bd43dee9b5d4004f2616327473f *R/covar.r | ||
11c07968792a652c64892bda43fa5159 *R/pcor.r | ||
c4bca881ce1b7667433e2df8599ca50e *R/sparse_utils.r | ||
5e4e7156702aee5609ffe9759888985d *R/wrappers_dense.r | ||
044e1cbf63c2e231af913107d3d334d3 *R/wrappers_sparse.r | ||
b6f2175a98187b73d0436b124f52f26a *README.md | ||
a4fc931265a2ca7fe324a449271a0f57 *TODO | ||
bb5c899aa639e2e4f0ea2d723fd32d95 *build/vignette.rds | ||
166871e57ef5ed7c9461ec7827b88f14 *cleanup | ||
0ac65c575d2680a5df6a96c3c10b8dda *inst/CITATION | ||
8f9bfc2f5d48b4f31ab3bdb3f0b6bf45 *inst/benchmarks/cosine_dense.r | ||
fcadfd066a6719865305f9e9aedd86ab *inst/benchmarks/cosine_lsa.r | ||
f85f416e87de8c38e2808fc8b940df01 *inst/benchmarks/cosine_sparse.r | ||
d034a24811e8b0f1ce330b9c7887007e *inst/benchmarks/covar_dense.r | ||
888bf1c07127c388735c255052205e21 *inst/benchmarks/pcor_dense.r | ||
1b069a3d12f52d7e28aa56cb51603b6f *inst/doc/algos.R | ||
d699fe7dd9dddf1482de5c1a3e08a602 *inst/doc/algos.Rmd | ||
6e753572885a26b9d5405807b3e9a2a0 *inst/doc/algos.html | ||
ab83b97be43df5847bed2667536676ec *inst/doc/coop.R | ||
de36632929b94b179244cde44e55a5a6 *inst/doc/coop.Rmd | ||
ee4614176438dc21ec8668ef225121e8 *inst/doc/coop.html | ||
94ed171866df7a7c74b9f41d4c21cb39 *man/coop-package.Rd | ||
750a4a0613590fbb5984064ce4405c2b *man/cosine.Rd | ||
7dd196243387c783418efe3e15ae79c6 *man/covar.Rd | ||
3fe9f0fb3dac7ba723166cf135b5cdb9 *man/pcor.Rd | ||
4af2b676cdaf23b229abc9009feab152 *man/sparsity.Rd | ||
189087126049e969612cebc6f8a59615 *src/Makevars | ||
801a9d0992b02697cea7c4c0ca0b4013 *src/coop.h | ||
9f1fde6c6d2d61d2e59e0417372f1d9e *src/dense.c | ||
11057898bd19e220e1456eee61841a30 *src/omp.h | ||
25c24be622d6760c37b155fc1d4f9cf2 *src/sparse.c | ||
80a7aedd0ca413f82275d18de922f60d *src/utils.c | ||
1282c80c02b99ce874d62b0f7dfb4fd7 *src/wrapper.c | ||
0c6739c1de4310c9c691054e3c852f67 *tests/dense_matrix.R | ||
1e3e7830c1338674932cfdf828925218 *tests/dense_vecvec.R | ||
a702c26776c186a2bce31dd9293d40bf *tests/sparse_matrix.R | ||
e8d9e924fc96fac47bf482bac1384aa3 *tests/sparse_utils.R | ||
d699fe7dd9dddf1482de5c1a3e08a602 *vignettes/algos.Rmd | ||
de36632929b94b179244cde44e55a5a6 *vignettes/coop.Rmd | ||
4e4aec951fee7e6a9c8deabf2adf0e32 *vignettes/include/coop.bib | ||
b9fe5ab2a2112614ca78779b7d10eafc *vignettes/include/custom.css | ||
42b936088db6e1da3b0e0d5f3699e901 *vignettes/include/headers.js | ||
415ce49f0c2624b998fb21c46d1c3557 *vignettes/include/ieee.csl | ||
578b1eda5f3370f0580d2d2ff7889db9 *vignettes/rebuild |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
S3method(cosine,default) | ||
S3method(cosine,matrix) | ||
S3method(cosine,simple_triplet_matrix) | ||
S3method(covar,default) | ||
S3method(covar,matrix) | ||
S3method(pcor,default) | ||
S3method(pcor,matrix) | ||
S3method(sparsity,matrix) | ||
S3method(sparsity,simple_triplet_matrix) | ||
export(cosine) | ||
export(covar) | ||
export(pcor) | ||
export(sparsity) | ||
useDynLib(coop, R_co_mat, R_co_vecvec, R_co_sparse, R_sparsity_int, R_sparsity_dbl) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
### Corresponds to values in src/wrappers.c | ||
CO_SIM <- 1L | ||
CO_ORR <- 2L | ||
CO_VAR <- 3L |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
#' Cooperation: A Package of Co-Operations | ||
#' | ||
#' @description | ||
#' Fast implementations of the co-operations: covariance, | ||
#' correlation, and cosine similarity. The implementations are | ||
#' fast and memory-efficient and their use is resolved | ||
#' automatically based on the input data, handled by R's S3 | ||
#' methods. Full descriptions of the algorithms and benchmarks | ||
#' are available in the package vignettes. | ||
#' | ||
#' Covariance and correlation should largely need no introduction. | ||
#' Cosine similarity is commonly needed in, for example, natural | ||
#' language processing, where the cosine similarity coefficients | ||
#' of all columns of a term-document or document-term matrix is | ||
#' needed. | ||
#' | ||
#' @section Implementation Details: | ||
#' Multiple storage schemes for the input data are accepted. | ||
#' For dense matrices, an ordinary R matrix input is accepted. | ||
#' For sparse matrices, a matrix in COO format, namely | ||
#' \code{simple_triplet_matrix} from the slam package, is accepted. | ||
#' | ||
#' The implementation for dense matrix inputs is dominated | ||
#' by a symmetric rank-k update via the BLAS subroutine \code{dsyrk}; | ||
#' see the package vignette for a discussion of the algorithm | ||
#' implementation and complexity. | ||
#' | ||
#' The implementation for two dense vector inputs is dominated by the | ||
#' product \code{t(x) \%*\% y} performed by the BLAS subroutine | ||
#' \code{dgemm} and the normalizing products \code{t(y) \%*\% y}, | ||
#' each computed via the BLAS function \code{dsyrk}. | ||
#' | ||
#' @useDynLib coop, R_co_mat, R_co_vecvec, | ||
#' R_co_sparse, R_sparsity_int, R_sparsity_dbl | ||
#' | ||
#' @docType package | ||
#' @name coop-package | ||
#' @author Drew Schmidt | ||
#' @keywords package | ||
NULL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
#' Cosine Similarity | ||
#' | ||
#' Compute the cosine similarity matrix efficiently. The function | ||
#' syntax and behavior is largely modeled after that of the | ||
#' \code{cosine()} function from the \code{lsa} package, although | ||
#' with a very different implementation. | ||
#' | ||
#' @details | ||
#' See \code{?coop-package} for implementation details. | ||
#' | ||
#' @param x | ||
#' A numeric matrix or vector. | ||
#' @param y | ||
#' A vector (when \code{x} is a vector) or missing (blank) when | ||
#' \code{x} is a matrix. | ||
#' | ||
#' @return | ||
#' The \eqn{n\times n} matrix of all pair-wise vector cosine | ||
#' similarities of the columns. | ||
#' | ||
#' @examples | ||
#' x <- matrix(rnorm(10*3), 10, 3) | ||
#' | ||
#' coop::cosine(x) | ||
#' coop::cosine(x[, 1], x[, 2]) | ||
#' | ||
#' @author Drew Schmidt | ||
#' @seealso \code{\link{sparsity}} | ||
#' @export | ||
cosine <- function(x, y) UseMethod("cosine") | ||
|
||
|
||
|
||
#' @export | ||
cosine.matrix <- function(x, y) | ||
{ | ||
co_matrix(x, y, CO_SIM) | ||
} | ||
|
||
|
||
|
||
#' @export | ||
cosine.default <- function(x, y) | ||
{ | ||
co_vecvec(x, y, CO_SIM) | ||
} | ||
|
||
|
||
|
||
#' @export | ||
cosine.simple_triplet_matrix <- function(x, y) | ||
{ | ||
co_sparse(x, y, type=CO_SIM) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#' Covariance | ||
#' | ||
#' An optimized, efficient implemntation for computing covariance. | ||
#' | ||
#' @details | ||
#' See \code{?coop-package} for implementation details. | ||
#' | ||
#' @param x | ||
#' A numeric matrix or vector. | ||
#' @param y | ||
#' A vector (when \code{x} is a vector) or missing (blank) when | ||
#' \code{x} is a matrix. | ||
#' | ||
#' @return | ||
#' The covariance matrix. | ||
#' | ||
#' @examples | ||
#' x <- matrix(rnorm(10*3), 10, 3) | ||
#' | ||
#' coop::pcor(x) | ||
#' coop::pcor(x[, 1], x[, 2]) | ||
#' | ||
#' @author Drew Schmidt | ||
#' @seealso \code{\link{cosine}} | ||
#' @export | ||
covar <- function(x, y) UseMethod("covar") | ||
|
||
|
||
|
||
#' @export | ||
covar.matrix <- function(x, y) | ||
{ | ||
co_matrix(x, y, CO_VAR) | ||
} | ||
|
||
|
||
|
||
#' @export | ||
covar.default <- function(x, y) | ||
{ | ||
co_vecvec(x, y, CO_VAR) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
#' Pearson Correlation | ||
#' | ||
#' An optimized, efficient implemntation for computing the pearson | ||
#' correlation. | ||
#' | ||
#' @details | ||
#' See \code{?coop} for implementation details. | ||
#' | ||
#' @param x | ||
#' A numeric matrix or vector. | ||
#' @param y | ||
#' A vector (when \code{x} is a vector) or missing (blank) when | ||
#' \code{x} is a matrix. | ||
#' | ||
#' @return | ||
#' The pearson correlation matrix. | ||
#' | ||
#' @examples | ||
#' x <- matrix(rnorm(10*3), 10, 3) | ||
#' | ||
#' coop::pcor(x) | ||
#' coop::pcor(x[, 1], x[, 2]) | ||
#' | ||
#' @author Drew Schmidt | ||
#' @seealso \code{\link{cosine}} | ||
#' @export | ||
pcor <- function(x, y) UseMethod("pcor") | ||
|
||
|
||
|
||
#' @export | ||
pcor.matrix <- function(x, y) | ||
{ | ||
co_matrix(x, y, CO_ORR) | ||
} | ||
|
||
|
||
|
||
#' @export | ||
pcor.default <- function(x, y) | ||
{ | ||
co_vecvec(x, y, CO_ORR) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
#' Sparsity | ||
#' | ||
#' Show the sparsity (as a count or proportion) of a matrix. For | ||
#' example, .99 sparsity means 99\% of the values are zero. | ||
#' Similarly, a sparsity of 0 means the matrix is fully dense. | ||
#' | ||
#' @details | ||
#' The implementation is very efficient for dense matrices. For | ||
#' sparse triplet matrices, the count is trivial. | ||
#' | ||
#' @param x | ||
#' The matrix, stored as an ordinary R matrix or as a "simple | ||
#' triplet matrix" (from the slam package). | ||
#' @param proportion | ||
#' Logical; should a proportion or a count be returned? | ||
#' | ||
#' @return | ||
#' The sparsity of the input matrix, as a proportion or a count. | ||
#' | ||
#' @examples | ||
#' ## Completely sparse matrix | ||
#' x <- matrix(0, 10, 10) | ||
#' coop::sparsity(x) | ||
#' | ||
#' ## 15\% density / 85\% sparsity | ||
#' x[sample(length(x), size=15)] <- 1 | ||
#' coop::sparsity(x) | ||
#' | ||
#' @author Drew Schmidt | ||
#' @export | ||
sparsity <- function(x, proportion=TRUE) UseMethod("sparsity") | ||
|
||
|
||
|
||
#' @export | ||
sparsity.matrix <- function(x, proportion=TRUE) | ||
{ | ||
if (is.integer(x)) | ||
count <- .Call(R_sparsity_int, x) | ||
else if (is.double(x)) | ||
count <- .Call(R_sparsity_dbl, x, tol=1e-10) | ||
else | ||
stop("matrix 'x' must be numeric.") | ||
|
||
if (proportion) | ||
count / nrow(x) / ncol(x) | ||
else | ||
count | ||
} | ||
|
||
|
||
|
||
#' @export | ||
sparsity.simple_triplet_matrix <- function(x, proportion=TRUE) | ||
{ | ||
if (proportion) | ||
1 - length(x$v) / nrow(x) / ncol(x) | ||
else | ||
nrow(x)*ncol(x) - length(x$v) | ||
} | ||
|
||
|
||
|
||
# Sparse matrix generator; used only for tests | ||
# @param m,n Dimensions (rows, cols) | ||
# @param prop Proportion of non-zeros. | ||
dense_stored_sparse_mat <- function(m, n, prop) | ||
{ | ||
size <- prop*m*n | ||
x <- matrix(0, m, n) | ||
x[sample(m*n, size=size)] <- 10#rnorm(size) | ||
x | ||
} |
Oops, something went wrong.