Skip to content

Commit

Permalink
Merge pull request #335 from zhewa/master
Browse files Browse the repository at this point in the history
Sparse Matrix support and refactorings
  • Loading branch information
zhewa committed May 8, 2021
2 parents a04c896 + 50c69ef commit fd80a49
Show file tree
Hide file tree
Showing 101 changed files with 2,625 additions and 7,558 deletions.
7 changes: 7 additions & 0 deletions .gitignore
Expand Up @@ -15,6 +15,10 @@
# produced vignettes
vignettes/*.html
vignettes/*.pdf
vignettes/*.log
inst/rmarkdown/*.html
inst/rmarkdown/*.rds
inst/rmarkdown/*.csv
# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
.httr-oauth
# knitr and R markdown default cache directories
Expand All @@ -29,6 +33,8 @@ celda.Rproj
src/*.o
src/*.dll
src/*.so
src-i386/*
src-x64/*
etc/*
# Celda log files with default prefix
Celda_chain.*log.txt
Expand All @@ -37,4 +43,5 @@ doc
Meta
.Rprofile
renv/
renv.lock

10 changes: 5 additions & 5 deletions DESCRIPTION
@@ -1,6 +1,6 @@
Package: celda
Title: CEllular Latent Dirichlet Allocation
Version: 1.7.7
Version: 1.7.9
Authors@R: c(person("Joshua", "Campbell", email = "camp@bu.edu",
role = c("aut", "cre")),
person("Sean", "Corbett", email = "scorbett@bu.edu", role = c("aut")),
Expand All @@ -21,12 +21,12 @@ Imports: plyr, foreach, ggplot2, RColorBrewer, grid, scales, gtable,
grDevices, graphics, matrixStats, doParallel, digest, methods,
reshape2, S4Vectors, data.table,
Rcpp, RcppEigen, uwot, enrichR, stringi, SummarizedExperiment,
MCMCprecision, ggrepel, Rtsne, withr, dendextend, ggdendro,
pROC, scater (>= 1.14.4), scran, SingleCellExperiment, dbscan,
DelayedArray, Seurat, stringr, Matrix, ComplexHeatmap, multipanelfigure,
MCMCprecision, ggrepel, Rtsne, withr,
scater (>= 1.14.4), scran, SingleCellExperiment, dbscan,
DelayedArray, stringr, Matrix, ComplexHeatmap, multipanelfigure,
circlize
Suggests: testthat, knitr, roxygen2, rmarkdown, biomaRt, covr,
BiocManager, BiocStyle, M3DExampleData, TENxPBMCData
BiocManager, BiocStyle, M3DExampleData, TENxPBMCData, singleCellTK
LinkingTo: Rcpp, RcppEigen
License: MIT + file LICENSE
Encoding: UTF-8
Expand Down
15 changes: 8 additions & 7 deletions NAMESPACE
Expand Up @@ -30,9 +30,7 @@ export(distinctColors)
export(factorizeMatrix)
export(featureModuleLookup)
export(featureModuleTable)
export(findMarkersTree)
export(geneSetEnrich)
export(getDecisions)
export(logLikelihood)
export(logLikelihoodHistory)
export(matrixNames)
Expand All @@ -49,10 +47,8 @@ export(plotDimReduceFeature)
export(plotDimReduceGrid)
export(plotDimReduceModule)
export(plotGridSearchPerplexity)
export(plotGridSearchPerplexityDiff)
export(plotHeatmap)
export(plotMarkerDendro)
export(plotMarkerHeatmap)
export(plotRPC)
export(recodeClusterY)
export(recodeClusterZ)
export(recursiveSplitCell)
Expand Down Expand Up @@ -97,7 +93,6 @@ exportMethods(decontX)
exportMethods(decontXcounts)
exportMethods(factorizeMatrix)
exportMethods(featureModuleLookup)
exportMethods(findMarkersTree)
exportMethods(geneSetEnrich)
exportMethods(logLikelihood)
exportMethods(logLikelihoodHistory)
Expand All @@ -111,7 +106,7 @@ exportMethods(plotDimReduceFeature)
exportMethods(plotDimReduceGrid)
exportMethods(plotDimReduceModule)
exportMethods(plotGridSearchPerplexity)
exportMethods(plotGridSearchPerplexityDiff)
exportMethods(plotRPC)
exportMethods(recursiveSplitCell)
exportMethods(recursiveSplitModule)
exportMethods(reorderCelda)
Expand All @@ -131,6 +126,9 @@ import(graphics)
import(grid)
import(uwot)
importFrom(MCMCprecision,fit_dirichlet)
importFrom(Matrix,colSums)
importFrom(Matrix,rowSums)
importFrom(Matrix,t)
importFrom(RColorBrewer,brewer.pal)
importFrom(Rtsne,Rtsne)
importFrom(data.table,as.data.table)
Expand Down Expand Up @@ -160,10 +158,13 @@ importFrom(scales,dscale)
importFrom(scales,hue_pal)
importFrom(stringi,stri_list2matrix)
importFrom(withr,with_seed)
importMethodsFrom(Matrix,"%*%")
useDynLib(celda,"_colSumByGroup")
useDynLib(celda,"_colSumByGroupChange")
useDynLib(celda,"_colSumByGroupChange_numeric")
useDynLib(celda,"_colSumByGroup_numeric")
useDynLib(celda,"_perplexityG")
useDynLib(celda,"_rowSumByGroup")
useDynLib(celda,"_rowSumByGroupChange")
useDynLib(celda,"_rowSumByGroupChange_numeric")
useDynLib(celda,"_rowSumByGroup_numeric")
25 changes: 25 additions & 0 deletions R/RcppExports.R
Expand Up @@ -42,6 +42,15 @@ eigenMatMultInt <- function(A, B) {
.Call('_celda_eigenMatMultInt', PACKAGE = 'celda', A, B)
}

#' Fast matrix multiplication for double x double
#'
#' @param A a double matrix
#' @param B an integer matrix
#' @return An integer matrix representing the product of A and B
eigenMatMultNumeric <- function(A, B) {
.Call('_celda_eigenMatMultNumeric', PACKAGE = 'celda', A, B)
}

#' Fast normalization for numeric matrix
#'
#' @param R_counts An integer matrix
Expand Down Expand Up @@ -77,3 +86,19 @@ nonzero <- function(R_counts) {
.Call('_celda_nonzero', PACKAGE = 'celda', R_counts)
}

colSumByGroupSparse <- function(counts, group, K) {
.Call('_celda_colSumByGroupSparse', PACKAGE = 'celda', counts, group, K)
}

rowSumByGroupSparse <- function(counts, group, L) {
.Call('_celda_rowSumByGroupSparse', PACKAGE = 'celda', counts, group, L)
}

colSumByGroupChangeSparse <- function(counts, px, group, pgroup, K) {
.Call('_celda_colSumByGroupChangeSparse', PACKAGE = 'celda', counts, px, group, pgroup, K)
}

rowSumByGroupChangeSparse <- function(counts, px, group, pgroup, L) {
.Call('_celda_rowSumByGroupChangeSparse', PACKAGE = 'celda', counts, px, group, pgroup, L)
}

39 changes: 26 additions & 13 deletions R/accessors.R
Expand Up @@ -15,7 +15,6 @@
#' to use. Default "featureSubset".
#' @param value Character vector of cell cluster labels for replacements. Works
#' only if \code{x} is a \linkS4class{SingleCellExperiment} object.
#' @param ... Ignored. Placeholder to prevent check warning.
#' @return One of
#' \itemize{
#' \item Character vector if \code{x} is a
Expand All @@ -26,7 +25,7 @@
#' Models) and/or feature module labels (for celda_G and celdaCG Models).}
#' @export
setGeneric("celdaClusters",
function(x, ...) {
function(x, altExpName = "featureSubset") {
standardGeneric("celdaClusters")
})

Expand Down Expand Up @@ -68,9 +67,13 @@ setGeneric("celdaClusters<-",

#' @rdname celdaClusters
#' @export
setReplaceMethod("celdaClusters", signature(x = "SingleCellExperiment"),
setMethod("celdaClusters<-", signature(x = "SingleCellExperiment"),
function(x, altExpName = "featureSubset", value) {
altExp <- SingleCellExperiment::altExp(x, altExpName)
if (!is.factor(value)) {
message("Cluster labels are converted to factors.")
value <- as.factor(value)
}
SummarizedExperiment::colData(altExp)$celda_cell_cluster <- value
SingleCellExperiment::altExp(x, altExpName) <- altExp
return(x)
Expand Down Expand Up @@ -122,9 +125,13 @@ setGeneric("celdaModules<-",

#' @rdname celdaModules
#' @export
setReplaceMethod("celdaModules", signature(sce = "SingleCellExperiment"),
setMethod("celdaModules<-", signature(sce = "SingleCellExperiment"),
function(sce, altExpName = "featureSubset", value) {
altExp <- SingleCellExperiment::altExp(sce, altExpName)
if (!is.factor(value)) {
message("Module labels are converted to factors.")
value <- as.factor(value)
}
SummarizedExperiment::rowData(altExp)$celda_feature_module <- value
SingleCellExperiment::altExp(sce, altExpName) <- altExp
return(sce)
Expand All @@ -145,12 +152,11 @@ setReplaceMethod("celdaModules", signature(sce = "SingleCellExperiment"),
#' to use. Default "featureSubset".
#' @param value Character vector of sample labels for replacements. Works
#' only is \code{x} is a \linkS4class{SingleCellExperiment} object.
#' @param ... Ignored. Placeholder to prevent check warning.
#' @return Character vector. Contains the sample labels provided at model
#' creation, or those automatically generated by celda.
#' @export
setGeneric("sampleLabel",
function(x, ...) {
function(x, altExpName = "featureSubset") {
standardGeneric("sampleLabel")
})

Expand All @@ -177,9 +183,13 @@ setGeneric("sampleLabel<-",
)
#' @rdname sampleLabel
#' @export
setReplaceMethod("sampleLabel", signature(x = "SingleCellExperiment"),
setMethod("sampleLabel<-", signature(x = "SingleCellExperiment"),
function(x, altExpName = "featureSubset", value) {
altExp <- SingleCellExperiment::altExp(x, altExpName)
if (!is.factor(value)) {
message("Sample labels are converted to factors.")
value <- as.factor(value)
}
SummarizedExperiment::colData(altExp)$celda_sample_label <- value
SingleCellExperiment::altExp(x, altExpName) <- altExp
return(x)
Expand Down Expand Up @@ -265,13 +275,12 @@ setMethod("matrixNames",
#' \code{celdaList}.
#' @param altExpName The name for the \link{altExp} slot
#' to use. Default "featureSubset".
#' @param ... Ignored. Placeholder to prevent check warning.
#' @return Data Frame. Contains details on the various K/L parameters, chain
#' parameters, seed, and final log-likelihoods derived for each model in the
#' provided celdaList.
#' @export
setGeneric("runParams",
function(x, ...) {
function(x, altExpName = "featureSubset") {
standardGeneric("runParams")
}
)
Expand Down Expand Up @@ -312,13 +321,12 @@ setMethod("runParams",
#' \code{celdaList}.
#' @param altExpName The name for the \link{altExp} slot
#' to use. Default "featureSubset".
#' @param ... Ignored. Placeholder to prevent check warning.
#' @return List. Contains one celdaModel object for each of the parameters
#' specified in \code{runParams(x)}.
#' @export
setGeneric(
"resList",
function(x, ...) {
function(x, altExpName = "featureSubset") {
standardGeneric("resList")
}
)
Expand Down Expand Up @@ -359,17 +367,18 @@ setMethod("resList",
#' returned by \link{celda_C}, \link{celda_G}, or \link{celda_CG}.
#' @param altExpName The name for the \link{altExp} slot
#' to use. Default "featureSubset".
#' @param ... Ignored. Placeholder to prevent check warning.
#' @return Character. The celda model. Can be one of "celda_C", "celda_G", or
#' "celda_CG".
#' @examples
#' data(sceCeldaCG)
#' celdaModel(sceCeldaCG)
#' @export
setGeneric("celdaModel",
function(sce, ...) {
function(sce, altExpName = "featureSubset") {
standardGeneric("celdaModel")
})


#' @rdname celdaModel
#' @export
setMethod("celdaModel",
Expand Down Expand Up @@ -418,6 +427,8 @@ setGeneric(
standardGeneric("celdaPerplexity")
}
)


#' @title Get perplexity for every model in a celdaList
#' @description Returns perplexity for each model in a celdaList as calculated
#' by `perplexity().`
Expand Down Expand Up @@ -452,6 +463,8 @@ setGeneric(
standardGeneric("countChecksum")
}
)


#' @title Get the MD5 hash of the count matrix from the celdaList
#' @description Returns the MD5 hash of the count matrix used to generate the
#' celdaList.
Expand Down
30 changes: 23 additions & 7 deletions R/celdaGridSearch.R
Expand Up @@ -46,7 +46,6 @@
#' @return A \linkS4class{SingleCellExperiment} object. Function
#' parameter settings and celda model results are stored in the
#' \link{metadata} \code{"celda_grid_search"} slot.
#' @param ... Ignored. Placeholder to prevent check warning.
#' @seealso \link{celda_G} for feature clustering, \link{celda_C} for
#' clustering of cells, and \link{celda_CG} for simultaneous clustering of
#' features and cells. \link{subsetCeldaList} can subset the \code{celdaList}
Expand All @@ -56,7 +55,7 @@
#' @importFrom doParallel registerDoParallel
#' @importFrom methods is
#' @examples
#' \dontrun{
#' \donttest{
#' data(celdaCGSim)
#' ## Run various combinations of parameters with 'celdaGridSearch'
#' celdaCGGridSearchRes <- celdaGridSearch(celdaCGSim$counts,
Expand All @@ -68,7 +67,22 @@
#' cores = 1)
#' }
#' @export
setGeneric("celdaGridSearch", function(x, ...) {
setGeneric("celdaGridSearch",
function(
x,
useAssay = "counts",
altExpName = "featureSubset",
model,
paramsTest,
paramsFixed = NULL,
maxIter = 200,
nchains = 3,
cores = 1,
bestOnly = TRUE,
seed = 12345,
perplexity = TRUE,
verbose = TRUE,
logfilePrefix = "Celda") {
standardGeneric("celdaGridSearch")})


Expand Down Expand Up @@ -442,7 +456,6 @@ setMethod("celdaGridSearch",
#' models in list \code{"celda_grid_search"} in \code{metadata(x)}.
#' @param altExpName The name for the \link{altExp} slot
#' to use. Default "featureSubset".
#' @param ... Ignored. Placeholder to prevent check warning.
#' @return One of
#' \itemize{
#' \item A new \linkS4class{SingleCellExperiment} object containing
Expand All @@ -464,7 +477,9 @@ setMethod("celdaGridSearch",
#' chains in parallel. \link{selectBestModel} can get the best model for each
#' combination of parameters.
#' @export
setGeneric("subsetCeldaList", function(x, ...) {
setGeneric("subsetCeldaList",
function(x, params, altExpName = "featureSubset") {

standardGeneric("subsetCeldaList")})


Expand Down Expand Up @@ -583,7 +598,6 @@ setMethod("subsetCeldaList",
#' corresponding celda model object.
#' @param altExpName The name for the \link{altExp} slot
#' to use. Default "featureSubset".
#' @param ... Ignored. Placeholder to prevent check warning.
#' @return One of
#' \itemize{
#' \item A new \linkS4class{SingleCellExperiment} object containing
Expand All @@ -602,7 +616,9 @@ setMethod("subsetCeldaList",
#' instead of a \code{celdaList} object.}
#' @seealso \link{celdaGridSearch} \link{subsetCeldaList}
#' @export
setGeneric("selectBestModel", function(x, ...) {
setGeneric("selectBestModel",
function(x, asList = FALSE, altExpName = "featureSubset") {

standardGeneric("selectBestModel")})


Expand Down

0 comments on commit fd80a49

Please sign in to comment.