In [1]:
library(Seurat)
library(Matrix)
library(reticulate)
library(glmpca)
library(peakRAM)
library(scry)
library(SingleCellExperiment)
library(glmGamPoi)

“rgeos: versions of GEOS runtime 3.9.1-CAPI-1.14.2
and GEOS at installation 3.9.1dev-CAPI-1.14.1differ”
Attaching SeuratObject

Attaching sp

Loading required package: SummarizedExperiment

Loading required package: MatrixGenerics

Loading required package: matrixStats


Attaching package: ‘MatrixGenerics’


The following objects are masked from ‘package:matrixStats’:

    colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
    colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
    colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
    colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
    colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
    colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
    colWeightedMeans, colWeightedMedians, colWeightedSds,
    colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
    rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
    rowCumsums, rowDiffs, rowIQRDiffs, r

# Load data

In [2]:
mtx <- readMM('../datasets/zhengmix8eq/mat.mtx')
meta <- read.csv('../datasets/zhengmix8eq/meta.csv', row.names=1)
colnames(mtx) <- rownames(meta)
rownames(mtx) <- 1:nrow(mtx)

# Seurat pipeline - log

In [3]:
benchmark_seurat_log <- function(obj){
    obj <- NormalizeData(obj)
    obj <- FindVariableFeatures(obj, verbose=FALSE, nfeatures=2000)
    obj <- ScaleData(obj, verbose=FALSE)
    obj <- RunPCA(obj, verbose=FALSE)
    write.csv(Embeddings(obj), 'pca_coord/zhengmix8eq/pca_log.csv')
    write.csv(HVFInfo(obj)$variance.standardized, 'feature_var/zhengmix8eq/var_log.csv')
    obj
}

In [4]:
obj <- CreateSeuratObject(mtx, meta.data=meta)
result <- peakRAM(x <- benchmark_seurat_log(obj))

In [5]:
result[,c('Elapsed_Time_sec', 'Peak_RAM_Used_MiB')]

Unnamed: 0_level_0,Elapsed_Time_sec,Peak_RAM_Used_MiB
Unnamed: 0_level_1,<dbl>,<dbl>
1,8.047,228.4


# Seurat pipeline - sct

In [6]:
benchmark_seurat_sct<- function(obj){
    var_feature <- rep(0,nrow(obj))
    names(var_feature) <- 1:nrow(obj)
    
    obj <- SCTransform(obj, variable.features.n=2000, verbose=FALSE, method='glmGamPoi')
    obj <- RunPCA(obj, verbose=FALSE)
    write.csv(Embeddings(obj), 'pca_coord/zhengmix8eq/pca_sct.csv')
    var_feature[rownames(HVFInfo(obj))] <- HVFInfo(obj, 'sct')$residual_variance
    write.csv(var_feature, 'feature_var/zhengmix8eq/var_sct.csv')
    obj
}

In [7]:
obj <- CreateSeuratObject(mtx, meta.data=meta)
result <- peakRAM(x <- benchmark_seurat_sct(obj))

In [8]:
result[,c('Elapsed_Time_sec', 'Peak_RAM_Used_MiB')]

Unnamed: 0_level_0,Elapsed_Time_sec,Peak_RAM_Used_MiB
Unnamed: 0_level_1,<dbl>,<dbl>
1,67.841,1763.7


# glmpca

In [9]:
benchmark_scry_glmpca <- function(obj){
    obj <- devianceFeatureSelection(obj, assay="counts", sorted=FALSE)
    feature_var <- rowData(obj)$binomial_deviance
    write.csv(feature_var/mean(feature_var, na.rm=TRUE), 'feature_var/zhengmix8eq/var_scry.csv')
    select_features <- order(-feature_var)[1:2000]
    obj2 <- obj[select_features,]
    obj2 <- GLMPCA(obj2, 20, assay="counts", minibatch="stochastic")
    write.csv(metadata(obj2)$glmpca$factors, 'pca_coord/zhengmix8eq/pca_glmpca.csv')
    obj
}

In [10]:
obj <- SingleCellExperiment(list(counts=mtx))
result <- peakRAM(x <- benchmark_scry_glmpca(obj))