/
average_counts_rtree.R
142 lines (121 loc) · 4.81 KB
/
average_counts_rtree.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#' rtree implementation of count_within_many_impl
#' @param csd Cell seg data for a single field.
#' @param name Name associated with `csd`, for example the basename of the
#' image file.
#' @param combos List of pairs of (from phenotype name, to phenotype name)
#' and tissue category.
#' @param radii Vector of radii.
#' @param phenotype_rules Named list of phenotype rules.
#' @seealso count_within_many_impl
#' @md
#' @keywords internal
count_within_many_impl_rtree = function(csd, name, combos, radii,
phenotype_rules) {
# We will call count_within_detail to add a count column for
# each 'to' phenotype. Figure out what they are.
to_phenotypes = combos %>% purrr::map_chr(list('pair', 2)) %>% unique()
to_phenotypes = phenotype_rules[to_phenotypes]
# Get the categories of interest.
categories = combos %>% purrr::map_chr('category') %>% unique()
# Subset to what we care about, for faster distance calculation
if (!anyNA(categories))
csd = csd %>% dplyr::filter(`Tissue Category` %in% categories)
# Compute individual counts for each cell by category.
counts = purrr::map_dfr(categories, function(category) {
if (!is.na(category))
d_cat = csd %>% dplyr::filter(`Tissue Category`==category)
else d_cat = csd
dplyr::bind_cols(d_cat,
count_within_detail(d_cat, to_phenotypes, radii))
})
# Now we can do the work, computing summary stats for all combos and radii
purrr::map_dfr(combos, function(combo) {
summarize_combo(counts, combo$category, phenotype_rules,
combo$pair[[1]], combo$pair[[2]], radii)
})
}
# Compute summary stats for a single dataset, a single category and
# (from, to) pair, and all radii
summarize_combo = function(d, category, phenotype_rules, from, to, radii) {
if (!is.na(category))
d_cat = d %>% dplyr::filter(`Tissue Category`==category)
else {
d_cat = d
category = 'all'
}
to_count = sum(phenoptr::select_rows(d_cat, phenotype_rules[[to]]))
# The rows of interest
rows = d_cat %>%
dplyr::filter(phenoptr::select_rows(., phenotype_rules[[from]]))
# Now we can summarize per radius
if (nrow(rows) > 0) {
purrr::map_dfr(radii, function(radius) {
count_col = count_col_name(to, radius)
counts = rows[[count_col]] # Single column of interest
tibble::tibble(category=category,
from=from, to=to, radius=radius,
from_count=nrow(rows), to_count=to_count,
from_with = sum(counts>0, na.rm=TRUE),
within_mean = mean(counts, na.rm=TRUE))
})
} else {
tibble::tibble(
category=category,
from=from, to=to, radius = radii,
from_count = 0L,
to_count = to_count,
from_with = 0L,
within_mean = NA
)
}
}
#' Compute count within for individual cells in a single field.
#'
#' Very fast version using `rtree::countWithinDistance()`.
#' @param csd Cell seg data.
#' @param phenotypes Optional list of phenotypes to include. If omitted,
#' will use `unique_phenotypes(csd)`. Counts are from each cell to each
#' phenotype.
#' @param radii The radius or radii to search within.
#' @export
#' @md
count_within_detail = function(csd, phenotypes=NULL, radii) {
stop_if_multiple_fields(csd)
if (!function_exists('rtree', 'countWithinDistance'))
stop('Please install the rtree package with the command\n',
' remotes::install_github(\'akoyabio/rtree\')')
phenotypes = validate_phenotypes(phenotypes, csd)
field_locs = csd %>%
dplyr::select(X=`Cell X Position`, Y=`Cell Y Position`) %>%
as.matrix()
result = purrr::imap(phenotypes, function(phenotype, name) {
# Which cells are in the target phenotype?
phenotype_cells = select_rows(csd, phenotype)
if (sum(phenotype_cells)>0) {
# Make an rtree of the phenotype cells
to_cells_locs = field_locs[phenotype_cells, , drop=FALSE]
to_cells_tree = rtree::RTree(to_cells_locs)
# Now compute count within for each radius
purrr::map(radii, function(radius) {
within = rtree::countWithinDistance(to_cells_tree,
field_locs, radius)
# Subtract one for cells of type `pheno`; we don't want to count self
within = within - phenotype_cells
col_name = count_col_name(name, radius)
list(within) %>% rlang::set_names(col_name)
}) %>% purrr::flatten()
}
else {
# No cells of the selected phenotype
count_col = list(rep(0L, nrow(csd)))
purrr::map(radii, function(radius) {
col_name = count_col_name(name, radius)
count_col %>% rlang::set_names(col_name)
}) %>% purrr::flatten()
}
})
tibble::as_tibble(purrr::flatten(result))
}
count_col_name = function(pheno_name, radius) {
paste0(pheno_name, ' within ', radius)
}