/
ms.R
40 lines (39 loc) 路 1.46 KB
/
ms.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#' Sprinkle mutations on genealogy
#'
#' @details
#' `make_sample` creates a genotype matrix from a given genealogy tree.
#' @param graph igraph
#' @param nsam number of cells to sample
#' @param mu mutation rate per cell division (ignored if segsites is given)
#' @param segsites number of segregating sites
#' @rdname ms
#' @export
make_sample = function(graph, nsam = 0L, mu = NULL, segsites = NULL) {
nodes = igraphlite::Vnames(graph)[igraphlite::is_sink(graph)]
if (nsam > 0L) {
nodes = sample(nodes, nsam, replace = FALSE)
graph = subtree(graph, nodes)
}
mutations = mutate_clades(graph, mu = mu, segsites = segsites)
origins = purrr::map(mutations$carriers, \(x) as.integer(nodes %in% x))
m = t(do.call(rbind, origins) * mutations$number)
rownames(m) = nodes
colnames(m) = mutations$origin
m
}
mutate_clades = function(graph, mu = NULL, segsites = NULL) {
nodes = igraphlite::Vnames(graph)[!igraphlite::is_source(graph)]
# TODO: remove low-freq variants?
number = if (is.null(mu)) {
if (is.null(segsites)) stop("specify either mu or segsites")
stats::rmultinom(1L, segsites, rep(1, length(nodes))) |> as.vector()
} else if (mu > 0) {
if (!is.null(segsites)) stop("segsites is ignored if mu is given")
stats::rpois(length(nodes), mu)
} else {
rep(1L, length(nodes))
}
tibble::tibble(origin = nodes, number = number) |>
dplyr::filter(.data$number > 0) |>
dplyr::mutate(carriers = paths_to_sink(graph, .data$origin))
}