-
Notifications
You must be signed in to change notification settings - Fork 74
/
dropletUtils_barcodeRank.R
143 lines (126 loc) · 6.04 KB
/
dropletUtils_barcodeRank.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
.runBarcodeRankDrops <- function(barcode.matrix, lower = lower,
fit.bounds = fit.bounds,
df = df) {
## Convert to sparse matrix if not already in that format
barcode.matrix <- .convertToMatrix(barcode.matrix)
output <- DropletUtils::barcodeRanks(m = barcode.matrix, lower = lower,
fit.bounds = fit.bounds,
df = df)
knee.ix <- as.integer(output@listData$total >=
S4Vectors::metadata(output)$knee)
inflection.ix <- as.integer(output@listData$total >=
S4Vectors::metadata(output)$inflection)
rank.ix <- as.integer(output$rank)
total.ix <- as.integer(output$total)
fitted.ix <- as.integer(output$fitted)
result <- cbind(knee.ix, inflection.ix, rank.ix, total.ix, fitted.ix)
colnames(result) <- c("dropletUtils_barcodeRank_knee",
"dropletUtils_barcodeRank_inflection",
"dropletUtils_barcodeRank_rank",
"dropletUtils_barcodeRank_total",
"dropletUtils_barcodeRank_fitted")
result.list <- list(result,
S4Vectors::metadata(output)$knee,
S4Vectors::metadata(output)$inflection)
names(result.list) <- c("matrix", "knee", "inflection")
return(result.list)
}
#' @title Identify empty droplets using \link[DropletUtils]{barcodeRanks}.
#' @description Run \link[DropletUtils]{barcodeRanks} on a count matrix
#' provided in a \linkS4class{SingleCellExperiment} object. Distinguish between
#' droplets containing cells and ambient RNA in a droplet-based single-cell RNA
#' sequencing experiment.
#' @param inSCE A \linkS4class{SingleCellExperiment} object. Must contain a raw
#' counts matrix before empty droplets have been removed.
#' @param sample Character vector or colData variable name. Indicates which
#' sample each cell belongs to. Default \code{NULL}.
#' @param useAssay A string specifying which assay in the SCE to use. Default
#' \code{"counts"}
#' @param lower See \link[DropletUtils]{barcodeRanks} for more information.
#' Default \code{100}.
#' @param fitBounds See \link[DropletUtils]{barcodeRanks} for more information.
#' Default \code{NULL}.
#' @param df See \link[DropletUtils]{barcodeRanks} for more information. Default
#' \code{20}.
#' @return A \linkS4class{SingleCellExperiment} object with the
#' \link[DropletUtils]{barcodeRanks} output table appended to the
#' \link{colData} slot. The columns include
#' \code{dropletUtils_BarcodeRank_Knee} and
#' \code{dropletUtils_barcodeRank_inflection}. Please refer to the documentation
#' of \link[DropletUtils]{barcodeRanks} for details.
#' @seealso \code{\link[DropletUtils]{barcodeRanks}},
#' \code{\link{runDropletQC}}, \code{\link{plotBarcodeRankDropsResults}}
#' @examples
#' data(scExample, package = "singleCellTK")
#' sce <- runBarcodeRankDrops(inSCE = sce)
#' @export
#' @importFrom SummarizedExperiment colData colData<- assay
runBarcodeRankDrops <- function(inSCE,
sample = NULL,
useAssay = "counts",
lower = 100,
fitBounds = NULL,
df = 20
) {
p <- paste0(date(), " ... Running 'barcodeRanks'")
message(p)
## Getting current arguments values
argsList <- mget(names(formals()),sys.frame(sys.nframe()))
argsList <- argsList[!names(argsList) %in% c("inSCE")]
argsList$packageVersion <- utils::packageDescription("DropletUtils")$Version
sample <- .manageCellVar(inSCE, var = sample)
if (is.null(sample)) {
sample <- rep(1, ncol(inSCE))
}
## Define result matrix for all samples
output <- S4Vectors::DataFrame(
row.names = colnames(inSCE),
dropletUtils_BarcodeRank_Knee = integer(ncol(inSCE)),
dropletUtils_BarcodeRank_Inflection = integer(ncol(inSCE))
)
## Loop through each sample and run barcodeRank
samples <- unique(sample)
for (s in samples) {
sceSampleInd <- sample == s
sceSample <- inSCE[, sceSampleInd]
## Define meta matrix for each subinSCE
metaOutput <- S4Vectors::DataFrame(
row.names = colnames(sceSample),
dropletUtils_barcodeRank_rank = integer(ncol(sceSample)),
dropletUtils_barcodeRank_total = integer(ncol(sceSample)),
dropletUtils_barcodeRank_fitted = integer(ncol(sceSample)),
dropletUtils_barcodeRank_knee = integer(ncol(sceSample)),
dropletUtils_barcodeRank_inflection = integer(ncol(sceSample))
)
metaOutput$sample <- colData(sceSample)[["Sample"]]
mat <- assay(sceSample, i = useAssay)
result <- .runBarcodeRankDrops(barcode.matrix = mat, lower = lower,
fit.bounds = fitBounds,
df = df)
result.matrix <- result$matrix
output[sceSampleInd, ] <-
result.matrix[, c("dropletUtils_barcodeRank_knee",
"dropletUtils_barcodeRank_inflection")]
metaCols <- c("dropletUtils_barcodeRank_rank",
"dropletUtils_barcodeRank_total",
"dropletUtils_barcodeRank_fitted")
metaOutput[, metaCols] <- result.matrix[, metaCols]
metaOutput[,"dropletUtils_barcodeRank_knee"] <- rep(result$knee,
sum(sceSampleInd))
metaOutput[,"dropletUtils_barcodeRank_inflection"] <- rep(result$inflection,
sum(sceSampleInd))
# Remove duplicated Rank
metaOutput <-
metaOutput[!duplicated(metaOutput$dropletUtils_barcodeRank_rank), ]
if (!identical(samples, 1)) {
S4Vectors::metadata(inSCE)$sctk$runBarcodeRankDrops[[s]] <-
list(metaOutput = metaOutput, argsList = argsList)
}
}
if (identical(samples, 1)) {
S4Vectors::metadata(inSCE)$sctk$runBarcodeRankDrops$all_cells <-
list(metaOutput = metaOutput, argsList = argsList)
}
colData(inSCE) <- cbind(colData(inSCE), output)
return(inSCE)
}