-
Notifications
You must be signed in to change notification settings - Fork 0
/
benchmarking.R
164 lines (138 loc) · 5.58 KB
/
benchmarking.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#' Compute the batch separation metric
#'
#' @param emb the embedding of the cells where each row is a cell, and each column is a feature (e.g. principal component)
#' @param batch the batch labels of the cells
#' @return Cohen's kappa of the the predicted batch labels with the true batch label on the entire dataset (i.e., training accuracy)
#' @examples
#' # suppose that iris[["Species"]] are the batch labels
#' batch_separation(emb = iris[,1:2],batch = iris$Species) # lower batch separation present in dim = 1:2
#' plot(iris[,1],iris[,2],col = iris[["Species"]])
#'
#' batch_separation(emb = iris[,3:4],batch = iris$Species) # higher batch separation in dim = 3:4
#' plot(iris[,3],iris[,4],col = iris[["Species"]])
#' @export
batch_separation <- function(emb,batch){
dat <- data.frame(emb, batch)
model <- e1071::svm(batch ~ ., data = dat)
return(caret::confusionMatrix(model$fitted, dat$batch)$overall[["Kappa"]])
}
#' Wrapper for \code{scmap::scmapCluster}
#' @param query the query dataset (SingleCellExperiment object)
#' @param ref the reference dataset (SingleCellExperiment object)
#' @param gene_set a list of characters representing the genes
#' @param threshold the threshold parameter used in scmapCluster, between 0 and 1
#' @param ... additional parameters passed on to \code{caret::confusionMatrix}. For example \code{dnn = c("Prediction","Truth")}.
#' @return confusion matrix of the true labels in `ref` and the `scmapCluster` predicted labels
#' @export
run_scmapCluster <- function(query, ref, gene_set, param, ...){
# scmap requires the "feature_symbol" slot to be filled
rowData(query)$feature_symbol <- rownames(query)
rowData(ref)$feature_symbol <- rownames(ref)
# Specify the gene set used for scmapCluster
rowData(ref)$scmap_features <-
rowData(ref)$feature_symbol %in% gene_set
ref <- scmap::indexCluster(ref)
scmapCluster_results <- scmap::scmapCluster(
projection = query,
index_list = list(Reference = metadata(ref)$scmap_cluster_index),
threshold = param
)
true_labels <- colData(query)$cell_type1
pred_labels <-
factor(scmapCluster_results$scmap_cluster_labs[, "Reference"])
# make the factor levels uniform
shared_levels <- union(levels(true_labels),
levels(pred_labels))
true_labels <- forcats::fct_expand(true_labels, shared_levels)
pred_labels <- forcats::fct_expand(pred_labels, shared_levels)
caret::confusionMatrix(data = pred_labels,
reference = true_labels,
...)
}
#' Wrapper for \code{scmap::scmapCell}
#' @param query the query dataset (SingleCellExperiment object)
#' @param ref the reference dataset (SingleCellExperiment object)
#' @param gene_set a list of characters representing the genes
#' @param param the number of nearest neighbors in scmapCell, an integer
#' @param ... additional parameters passed on to \code{caret::confusionMatrix}. For example \code{dnn = c("Prediction","Truth")}.
#' @return confusion matrix of the true labels in `ref` and the `scmapCluster` predicted labels
#' @export
run_scmapCell <- function(query, ref, gene_set, param, ...) {
# scmap requires the "feature_symbol" slot to be filled
rowData(query)$feature_symbol <- rownames(query)
rowData(ref)$feature_symbol <- rownames(ref)
# Specify the gene set used for scmapCluster
rowData(ref)$scmap_features <-
rowData(ref)$feature_symbol %in% gene_set
ref <- indexCell(ref)
scmapCell_results <- scmapCell(
projection = query,
index_list = list(Reference = metadata(ref)$scmap_cell_index),
w = param)
scmapCell_clusters <- scmapCell2Cluster(scmapCell_results,
list(as.character(colData(ref)$cell_type1)))
true_labels <- colData(query)$cell_type1
pred_labels <-
factor(scmapCell_clusters$scmap_cluster_labs[, "Reference"])
# make the factor levels uniform
shared_levels <- union(levels(true_labels),
levels(pred_labels))
true_labels <- forcats::fct_expand(true_labels, shared_levels)
pred_labels <- forcats::fct_expand(pred_labels, shared_levels)
caret::confusionMatrix(data = pred_labels,
reference = true_labels,
...)
}
#' Run scmap over a range of parameters, over multiple gene sets
#' @param query a SingleCellExperiment object for the query dataset
#' @param reference a SingleCellExperiment object for the reference dataset
#' @param gene_sets a named list of gene sets
#' @param method_use a string denoting whether to use scmapCluster or scmapCell
#' @export
run_mapping_accuracy_comparison <-
function(query,
reference,
gene_sets,
method_use = "scmapCluster") {
f <- NULL
if(method_use == "scmapCluster"){
params <- 0.1*(1:9)
f <- corgi::run_scmapCluster
}
if(method_use == "scmapCell"){
params <- 5*(1:6)
f <- corgi::run_scmapCell
}
lapply(
X = params,
FUN = function(param) {
gene_sets %>%
lapply(
FUN = function(gene_set) {
f(
query = query,
ref = reference,
gene_set = gene_set,
param = param
)
}
) %>%
lapply(
FUN = function(confusionMat) {
confusionMat$overall
}
) %>%
Reduce(f = rbind) %>%
data.frame ->
results
results$Gene_set <- names(gene_sets)
results$Param <- param
rownames(results) <- NULL
results
}) %>%
Reduce(f = rbind) -> results
results$Gene_set <-
factor(results$Gene_set,
levels = names(gene_sets))
return(results)
}