-
Notifications
You must be signed in to change notification settings - Fork 3
/
getting_A.R
83 lines (81 loc) · 2.98 KB
/
getting_A.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#' Convert a tax table to a phylo object
#'
#' This is mostly the function \code{\link[ape]{as.phylo.formula}} from the ape package
#' but modified so that interior nodes get labeled.
#' Note: assumes that there are NOT single quotes around the labels.
#' @inheritParams ape::as.phylo.formula
#' @author Julien Dutheil \email{dutheil@@evolbio.mpg.de} and Eric Marcon, modified by Jacob Bien
#' @export
tax_table_to_phylo <- function (x, data = parent.frame(), collapse = TRUE, ...) {
err <- "Formula must be of the kind ~A1/A2/.../An."
if (any(lapply(data, class) != "factor"))
stop("Every column of data must be a factor.")
if (length(x) != 2)
stop(err)
if (x[[1]] != "~")
stop(err)
f <- x[[2]]
taxo <- list()
while (length(f) == 3) {
if (f[[1]] != "/")
stop(err)
f3.txt <- deparse(f[[3]])
if (!is.factor(data[[f3.txt]]))
stop(paste("Variable", f3.txt, "must be a factor"))
taxo[[f3.txt]] <- data[[f3.txt]]
if (length(f) > 1)
f <- f[[2]]
}
f.txt <- deparse(f)
if (!is.factor(data[[f.txt]]))
stop(paste("Variable", f.txt, "must be a factor."))
taxo[[f.txt]] <- data[[f.txt]]
taxo.data <- as.data.frame(taxo)
leaves.names <- as.character(taxo.data[, 1])
taxo.data[, 1] <- 1:nrow(taxo.data)
f.rec <- function(subtaxo) {
u <- ncol(subtaxo)
levels <- unique(subtaxo[, u])
if (u == 1) {
if (length(levels) != nrow(subtaxo))
warning("leaves names are not unique.")
return(as.character(subtaxo[, 1]))
}
t <- character(length(levels))
for (l in 1:length(levels)) {
x <- f.rec(subtaxo[subtaxo[, u] == levels[l], ][1:(u-1)])
t[l] <- paste0("(", paste(x, collapse = ","), ")", "'", levels[l], "'")
}
t
}
string <- paste0(f.rec(taxo.data), ";")
#string <- paste0("(", paste(f.rec(taxo.data), collapse = ","), ");")
phy <- ape::read.tree(text = string)
if (collapse)
phy <- ape::collapse.singles(phy)
phy$tip.label <- leaves.names[as.numeric(phy$tip.label)]
phy
}
#' Convert from phylo to the A matrix used in trac. Note this is similar to
#' the A used in rare, but with the column of all ones (for the root) removed
#' @param phy \code{phylo} object from the \code{ape} package
#' @export
phylo_to_A <- function(phy) {
nleaves <- length(phy$tip.label)
g <- igraph::graph_from_edgelist(phy$edge,
directed = TRUE)
igraph::V(g)$name <- c(phy$tip.label, phy$node.label)
num_nodes <- length(igraph::V(g))
A <- cbind(Matrix::Diagonal(nleaves),
Matrix::Matrix(0, nleaves, num_nodes - nleaves))
for (i in seq(nleaves + 1, num_nodes)) {
# for all nonleaf nodes
leaves_of_i <- intersect(igraph::subcomponent(g, mode = "out", v = i),
1:nleaves)
A[leaves_of_i, i] <- 1
}
A[, seq(nleaves + 1, num_nodes)] <- A[, rev(seq(nleaves + 1, num_nodes))]
rownames(A) <- phy$tip.label
colnames(A) <- c(phy$tip.label, rev(phy$node.label))
A[, -ncol(A)] # remove the root, which is the rightmost column
}