-
Notifications
You must be signed in to change notification settings - Fork 14
/
test_pseudobulk.R
106 lines (90 loc) · 4.27 KB
/
test_pseudobulk.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
test_pseudobulk <- function(data, design,
aggregate_cells_by,
contrast,
reduced_design = NULL,
ridge_penalty = 0,
subset_to = NULL, col_data = NULL, reference_level = NULL,
pval_adjust_method = "BH", sort_by = NULL,
decreasing = FALSE, n_max = Inf,
verbose = FALSE){
aggregate_cells_by_capture <- substitute(aggregate_cells_by)
subset_to_capture <- substitute(subset_to)
sort_by_capture <- substitute(sort_by)
test_pseudobulk_q(data, design = design,
aggregate_cells_by = aggregate_cells_by_capture,
contrast = {{contrast}},
reduced_design = reduced_design,
ridge_penalty = ridge_penalty,
subset_to = subset_to_capture,
col_data = col_data,
reference_level = reference_level,
pval_adjust_method = pval_adjust_method,
sort_by = sort_by_capture,
decreasing = decreasing, n_max = n_max,
verbose = verbose,
env = parent.frame())
}
test_pseudobulk_q <- function(data, design,
aggregate_cells_by,
contrast,
reduced_design = NULL,
ridge_penalty = 0,
subset_to = NULL, col_data = NULL, reference_level = NULL,
pval_adjust_method = "BH", sort_by = NULL,
decreasing = FALSE, n_max = Inf,
verbose = FALSE,
env = parent.frame()){
.Deprecated(paste0("Please use the 'pseudobulk' function instead. 'pseudobulk' produces a ",
"summarized 'SingleCellExperiment' object which is passed to `glm_gp()`."))
# Make sure 'subset_to' is valid
if(is.null(subset_to)){
subset_to <- TRUE
}
# Validate `data`
if(is.vector(data)){
data <- matrix(data, nrow = 1)
}
data_mat <- handle_data_parameter(data, use_assay = NULL, on_disk = NULL)
# Convert the formula to a model_matrix
col_data <- get_col_data(data, col_data)
des <- handle_design_parameter(design, data, col_data, reference_level)
# Split the data according to the `aggregate_cells_by` column
index_seq <- seq_len(ncol(data))
names(index_seq) <- colnames(data)
aggregate_cells_by_e <- eval_with_q(aggregate_cells_by, col_data, env = env)
if(length(aggregate_cells_by_e) == length(ncol(data)) ){
stop("'aggregate_cells_by' must be exactly as long as the number of columns in data")
}
names(aggregate_cells_by_e) <- colnames(data)
subset_to_e <- eval_with_q(subset_to, col_data, env = env)
pseudo_bulk_split <- split(index_seq[subset_to_e], aggregate_cells_by_e[subset_to_e])
# Aggregate the model matrix
new_model_matrix <- do.call(rbind, lapply(pseudo_bulk_split, function(idx){
DelayedMatrixStats::colMeans2(des$model_matrix, rows = idx)
}))
colnames(new_model_matrix) <- colnames(des$model_matrix)
# Aggregate the count matrix
new_data_mat <- do.call(cbind, lapply(pseudo_bulk_split, function(idx){
DelayedMatrixStats::rowSums2(data_mat, cols = idx)
}))
rownames(new_data_mat) <- rownames(data_mat)
# Aggregate reduced design if not NULL
if(! is.null(reduced_design)) {
reduced_mat <- handle_design_parameter(reduced_design, data, col_data, reference_level)$model_matrix
new_reduced_mat <- do.call(rbind, lapply(pseudo_bulk_split, function(idx){
DelayedMatrixStats::colMeans2(reduced_mat, rows = idx)
}))
colnames(new_reduced_mat) <- colnames(reduced_mat)
reduced_design <- new_reduced_mat
}
fit <- glm_gp(new_data_mat,
design = new_model_matrix,
ridge_penalty = ridge_penalty,
verbose = verbose)
test_de_q(fit, contrast = {{contrast}}, reduced_design = reduced_design,
subset_to = NULL, pseudobulk_by = NULL,
pval_adjust_method = pval_adjust_method, sort_by = sort_by,
decreasing = decreasing, n_max = n_max,
verbose = verbose,
env = env)
}