Skip to content

Commit

Permalink
Merge 96125fa into ef6ca0c
Browse files Browse the repository at this point in the history
  • Loading branch information
jingchengx committed Oct 12, 2020
2 parents ef6ca0c + 96125fa commit 7b1ffa5
Show file tree
Hide file tree
Showing 5 changed files with 288 additions and 4 deletions.
2 changes: 2 additions & 0 deletions docs/src/lib/public.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ printEdges
printNodes
summarizeDataCF
directEdges!
checkroot!
preorder!
cladewiseorder!
rootatnode!
Expand All @@ -46,6 +47,7 @@ getNodeAges
pairwiseTaxonDistanceMatrix
biconnectedComponents
blobDecomposition
treeedgecomponents
getlabels
nparams
nni!
Expand Down
4 changes: 3 additions & 1 deletion src/PhyloNetworks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ module PhyloNetworks
biconnectedComponents,
blobDecomposition!,
blobDecomposition,
checkroot!,
treeedgecomponents,
## Network Bootstrap
treeEdgesBootstrap,
hybridDetection,
Expand Down Expand Up @@ -180,7 +182,7 @@ module PhyloNetworks
include("pairwiseDistanceLS.jl")
include("interop.jl")
include("substitutionModels.jl")
include("biconnectedComponents.jl")
include("graph_components.jl")
include("traitsLikDiscrete.jl")
include("deprecated.jl")
include("nj.jl")
Expand Down
196 changes: 194 additions & 2 deletions src/biconnectedComponents.jl → src/graph_components.jl
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ function biconnectedComponents(node, index, S, blobs, ignoreTrivial)
#print(" case 1, low=$(node.inCycle) for node $(node.number); ")
#println("low=$(w.inCycle) for w $(w.number)")
# if node is an articulation point: pop until e
if (node.prev == nothing && children > 1) ||
(node.prev != nothing && w.inCycle >= node.k)
if (node.prev === nothing && children > 1) ||
(node.prev !== nothing && w.inCycle >= node.k)
# node is either root or an articulation point
# start a new strongly connected component
bb = Edge[]
Expand Down Expand Up @@ -224,3 +224,195 @@ function blobDecomposition!(net)
end
return blobR
end

"""
treeedgecomponents(net::HybridNetwork)
Return the tree-edge components of the semidirected network as a `membership`
dictionary `Node => Int`. Nodes with the same membership integer
value are in the same tree-edge component.
The tree-edge components of a network are the connected components of
the network when all hybrid edges are removed.
A `RootMismatch` error is thrown if there exists a cycle in any of the tree-edge
components, or if a tree-edge component has more than one "entry" hybrid node.
Warnings:
- since `Node`s are mutable, the network should not be modified
until usage of the output `membership` dictionary is over.
- the component IDs are not predicable, but will be consecutive integers
from 1 to the number of components.
"""
function treeedgecomponents(net::HybridNetwork)
# partition nodes into tree-edge components (TECs)
nodes = net.node
n = length(nodes)
unvisited = Set(nodes)
dfs_stack = Vector{Node}() # stack for iterative depth-first search over tree edges
dfs_parent = Dict{Node, Node}() # dfs_parent[node] = node's parent in DFS tree
# membership[node] = id of undirected component that the node belongs to
membership = Dict{Node, Int}()
cur_id = 0 # undirected component id

while !isempty(unvisited)
# loop over undirected components
# start with an unvisited node, DFS with undirected edges
cur_id += 1
node = pop!(unvisited) # unpredictable ordering: because unvisited is a set
push!(dfs_stack, node)
curnode = node
dfs_parent[curnode] = curnode
entrynode = nothing
while !isempty(dfs_stack)
# DFS loop over one tree-edge component
curnode = pop!(dfs_stack)
delete!(unvisited, curnode)
membership[curnode] = cur_id
for e in curnode.edge
if !e.hybrid
# for tree edge, do DFS, check for undirected cycles
nextnode = getOtherNode(e, curnode)
# if run into visited node (other than parent), then component has cycle
if nextnode !== dfs_parent[curnode]
if !in(nextnode, unvisited)
throw(RootMismatch(
"Undirected cycle exists, starting at node number $(nextnode.number)"))
else
dfs_parent[nextnode] = curnode
push!(dfs_stack, nextnode)
end
end
else # for hybrid edge, check there is at most one entry node into the TEC
if curnode === getChild(e)
if isnothing(entrynode)
entrynode = curnode
elseif entrynode !== curnode
throw(RootMismatch(
"""Multiple entry nodes for tree-edge component, numbered:
$(entrynode.number) and $(curnode.number)"""))
end
end
end
end
end
end

return membership
end


"""
checkroot!(net)
checkroot!(net::HybridNetwork, membership::Dict{Node, Int})
Set the root of `net` to an appropriate node and update the edges `containRoot`
field appropriately, using the `membership` output by [`treeedgecomponents`](@ref).
A node is appropriate to serve as root if it belongs in the
root tree-edge component, that is, the root of the tree-edge component graph.
- If the current root is appropriate, it is left as is. The direction of
edges (via `isChild1`) is also left as is, assuming it was in synch with
the existing root.
- Otherwise, the root is set to the first appropriate node in `net.node`,
that is not a leaf. Then edges are directed away from this root.
A `RootMismatch` error is thrown if `net` is not a valid semidirected
phylogenetic network (i.e. it is not possible to root the network in a way
compatible with the given hybrid edges).
Output: the `membership` ID of the root component.
The full set of nodes in the root component can be obtained as shown below.
Warning: only use the output component ID after calling the second version
`checkroot!(net, membership)`.
```jldoctest
julia> net = readTopology("(#H1:::0.1,#H2:::0.2,(((b)#H1)#H2,a));");
julia> membership = treeedgecomponents(net);
julia> rootcompID = checkroot!(net, membership);
julia> rootcomp = keys(filter(p -> p.second == rootcompID, membership));
julia> sort([n.number for n in rootcomp]) # number of nodes in the root component
3-element Array{Int64,1}:
-3
-2
4
```
"""
function checkroot!(net::HybridNetwork)
membership = treeedgecomponents(net) # dict Node => Int
return checkroot!(net, membership)
end
function checkroot!(net::HybridNetwork, membership::Dict{Node,Int})
# do not modify nodes or edges until we are done with membership
nodes = net.node
ncomp = maximum(values(membership)) # TECs are numbered 1,2,...,ncomp
#= 1. construct TEC graph: directed graph in which
vertices = TECs of original network, and
edge Ci --> Cj for each original hybrid edge: node in Ci --> node in Cj.
=#
tecG = [Set{Int}() for comp in 1:ncomp] # tecG[i] = set of children of ith TEC
noparent = trues(ncomp) # will stay true for TECs with no parent
for e in net.edge
e.hybrid || continue # skip tree edges
up, down = e.isChild1 ? (e.node[2], e.node[1]) : (e.node[1], e.node[2])
uc_up, uc_down = membership[up], membership[down]
noparent[uc_down] = false
push!(tecG[uc_up], uc_down)
end
# 2. check that the TEC graph has a single root
tec_root = findfirst(noparent)
if isnothing(tec_root) # all TECs have a parent: noparent are all false
throw(RootMismatch(
"Semidirected cycle exists: no component has in-degree 0"))
elseif sum(noparent) > 1
r1 = findfirst(noparent)
r2 = findlast(noparent)
nr1 = findfirst(n -> membership[n] == r1, nodes)
nr2 = findfirst(n -> membership[n] == r2, nodes)
throw(RootMismatch("nodes number $(nodes[nr1].number) and $(nodes[nr2].number) have no common ancestor"))
end

# 3. topological sort: check the TEC graph has no cycles, that is, is a DAG
indeg = zeros(Int, ncomp) # in-degree of vertex in TEC graph
for uc in tecG
for i in uc
indeg[i] += 1
end
end
headstack = [tec_root] # stack of TEC vertices of degree 0 in topological sort
while !isempty(headstack)
uc = pop!(headstack)
indeg[uc] -= 1
for i in tecG[uc]
indeg[i] -= 1
if indeg[i] == 0
push!(headstack, i)
end
end
end
cyclehead = findfirst(indeg .!= -1)
isnothing(cyclehead) || throw(RootMismatch(
"""Semidirected cycle exists, starting at TEC containing node number $(nodes[findfirst(n -> membership[n] == cyclehead, nodes)].number)"""))

# 4. original network: reset the network root if needed,
# and update the edges' containRoot accordingly.
# NOT done: build the root component to return it. Instead: return tec_root
# Set(node for node = nodes if membership[node] == tec_root)
# Set(node for (node,comp) in membership if comp == tec_root)
#rootcomp = keys(filter(p -> p.second == tec_root, membership))
curroot = nodes[net.root]
if membership[curroot] == tec_root
# update containRoot only: true for edges in or out of the root TEC
for e in net.edge
e.containRoot = (membership[getParent(e)] == tec_root)
end
else
net.root = findfirst(n -> (!n.leaf && membership[n] == tec_root), nodes)
directEdges!(net) # also updates containRoot of all edges
end
return tec_root # return rootcomp
end

3 changes: 2 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ tests = [
"test_calculateExpCF.jl", "test_calculateExpCF2.jl",
"test_hasEdge.jl", "test_parameters.jl", "test_correctLik.jl",
"test_partition.jl", "test_partition2.jl", "test_deleteHybridizationUpdate.jl", "test_add2hyb.jl", "test_optBLparts.jl", "test_undirectedOtherNetworks.jl",
"test_graph_components.jl",
"test_manipulateNet.jl", "test_compareNetworks.jl",
"test_badDiamII.jl",
"test_multipleAlleles.jl",
Expand All @@ -93,7 +94,7 @@ tests = [
"test_traitLikDiscrete.jl",
"test_phyLiNCoptimization.jl",
"test_readInputData.jl",
"test_nj.jl"
"test_nj.jl",
]

@show PhyloNetworks.CHECKNET
Expand Down
87 changes: 87 additions & 0 deletions test/test_graph_components.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# using Test, PhyloNetworks
# using PhyloPlots
# using Debugger

@testset "tree component" begin

treestr = "(A:3.0,(B:2.0,(C:1.0,D:1.0):1.0):1.0);"
tree = readTopology(treestr)
for e in tree.edge e.containRoot=false; end # wrong, on purpose
@test collect(values(treeedgecomponents(tree))) == repeat([1], inner=7)
rcomp = checkroot!(tree)
@test rcomp == 1
@test all([e.containRoot for e = tree.edge])

netstr = "(#H1:::0.1,#H2:::0.2,(((b)#H1)#H2,a));"
net = readTopology(netstr)
for e in net.edge e.containRoot=false; end # wrong, on purpose
node2comp = treeedgecomponents(net) # e.g. [1,1,1,2,2,3] or [2,2,2,1,1,3]
compsize = [count(isequal(i), values(node2comp)) for i in 1:3]
@test sort(compsize) == [1,2,3]
rcompID = checkroot!(net, node2comp)
@test compsize[rcompID] == 3
# plot(net, :R, showNodeNumber=true, showEdgeNumber=true);
rcomp = keys(filter(p -> p.second == rcompID, node2comp))
@test Set(n.number for n in rcomp) == Set([-2 -3 4])
@test Set(e.number for e in net.edge if e.containRoot) == Set([7 6 5 2 1])

# test semidirected cycle case
net.edge[1].isChild1 = !net.edge[1].isChild1
net.edge[7].isChild1 = !net.edge[7].isChild1
net.edge[7].hybrid = true
net.edge[7].gamma = net.edge[4].gamma
net.edge[4].gamma = 1.0
net.edge[4].hybrid = false
net.root = 5
net.node[6].hybrid = true
net.node[6].name = "H3"
net.node[2].hybrid = false
net.hybrid[1] = net.node[6]
mem = treeedgecomponents(net)
nnum2comp = Dict(n.number => uc for (n,uc) in mem)
@test nnum2comp[4] == nnum2comp[-3]
@test nnum2comp[1] == nnum2comp[2] == nnum2comp[3]
@test length(unique(nnum2comp[nn] for nn in [4,1,-2])) == 3
@test_throws PhyloNetworks.RootMismatch checkroot!(net, mem)
try checkroot!(net, mem)
catch e
@test occursin("Semidirected cycle", sprint(showerror, e))
end

# test multiple entry points case
str_level1 = "(((S8,S9),((((S1,S4),(S5)#H1),(#H1,(S6,S7))))#H2),(#H2,S10));"
netl1 = readTopology(str_level1)
P = PhyloNetworks # binding P: local to test set
root = netl1.node[19] # 19 = findfirst(n -> n.number == -2, netl1.node)
e1 = netl1.edge[20] # 20 = findfirst(e -> e.number == 20, netl1.edge)
e2 = netl1.edge[17] # 17 = findfirst(e -> e.number == 17, netl1.edge)
P.deleteEdge!(netl1, e1)
P.deleteEdge!(netl1, e2)
P.deleteNode!(netl1, root)
P.removeEdge!(netl1.node[18], e1) # 18 = P.getIndexNode(-12, netl1)
P.removeEdge!(netl1.node[16], e2) # 16 = P.getIndexNode(-3, netl1)
# nodenumber2UC = Dict(n.number => uc for (n,uc) in treeedgecomponents(netl1))
mem = treeedgecomponents(netl1)
@test_throws PhyloNetworks.RootMismatch checkroot!(netl1, mem)
try checkroot!(netl1, mem)
catch e
@test occursin("no common ancestor", sprint(showerror, e))
end

# test undirected cycle case
netl1 = readTopology(str_level1)
n1 = netl1.node[14] # 14 = P.getIndexNode(-6, netl1)
n2 = netl1.node[6] # 6 = P.getIndexNode(-8, netl1)
e = P.Edge(21,1.0,false,1.0) # 21 = length(netl1.edge) + 1
P.setNode!(e, n1)
P.setNode!(e, n2)
push!(n1.edge, e)
push!(n2.edge, e)
push!(net.edge, e)
@test_throws PhyloNetworks.RootMismatch treeedgecomponents(netl1)
try treeedgecomponents(netl1)
catch e
@test occursin("Undirected cycle", sprint(showerror, e))
end

end

0 comments on commit 7b1ffa5

Please sign in to comment.