-
Notifications
You must be signed in to change notification settings - Fork 2
/
TreeConstraints.jl
192 lines (170 loc) · 7.16 KB
/
TreeConstraints.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
"""
generate_constraints(
; mono::Vector{Vector{S}}=Vector{String}[],
not_mono::Vector{Vector{S}}=Vector{String}[],
exc::Vector{Tuple{Vector{S}, Vector{S}}}=Vector{Tuple{Vector{String}, Vector{String}}}[]
)::ConstraintDict where S<:AbstractString
Generate a dictionary of constraints based on the given arguments.
Mono for all monophyletic groups. not_mono for leafs that are not allowed
to form a monophletic group. And exc for a partial constraint, where one or more
leafs are not allowed to be part of a specific clade.
e.g.: generate_constraints(mono=[["a", "b"], ["a", "c"]],
exc=[(["b", "c"],["d"]])
"""
function generate_constraints(;
mono::Vector{Vector{S}} = Vector{String}[],
not_mono::Vector{Vector{S}} = Vector{String}[],
exc::Vector{Tuple{Vector{S},Vector{S}}} = Tuple{Vector{String},Vector{String}}[],
)::ConstraintDict where {S<:AbstractString}
constraints_dict = ConstraintDict()
lengths = [length(mono), length(not_mono), length(exc)]
# filter out trivial constraints...
filter!(x -> length(x) > 1, mono)
filter!(x -> length(x) > 1, not_mono)
filter!(x -> length(x[1]) > 1, exc)
filter!(x -> length(x[2]) > 0, exc)
# ... and inform the user about it
if (length(mono) < lengths[1]) || (length(not_mono) < lengths[2])
@warn "Some trivial 'mono' / 'not_mono' type constraints were removed.
A valid 'mono' / 'not_mono' constraint needs at least 2 elements."
end # if
if length(exc) < lengths[3]
@warn "Some trivial 'exc' type constraints were removed.
A non-trivial 'exc' constraints needs at least 2 elements in the first, and at least 1 in the second part of the tuple"
end # end
constraints_dict[:mono] = unique(mono)
constraints_dict[:not_mono] = unique(not_mono)
constraints_dict[:exc] = unique(exc)
return constraints_dict
end # generate_constraints
"""
generate_constraints!(
constraints::Dict;
mono::Vector{Vector{S}}=Vector{String}[],
not_mono::Vector{Vector{S}}=Vector{String}[],
exc::Vector{Tuple{Vector{S}, Vector{S}}}=Vector{Tuple{Vector{String}, Vector{String}}}[]
)::ConstraintDict where S<:AbstractString
Function that adds further constraints to an existing dictionary of constraints.
See basic generate_constraints function for more info.
"""
function generate_constraints!(
constraints::ConstraintDict;
mono::Vector{Vector{S}} = Vector{String}[],
not_mono::Vector{Vector{S}} = Vector{String}[],
exc::Vector{Tuple{Vector{S},Vector{S}}} = Tuple{Vector{String},Vector{String}}[],
)::ConstraintDict where {S<:AbstractString}
constraints2 = generate_constraints(mono = mono, not_mono = not_mono, exc = exc)
# make sure entries are unique for each constraint category
for key in keys(constraints)
constraints[key] = union(constraints[key], constraints2[key])
end # for
return constraints
end # generate_constraints
"""
generate_constraints(filename::String)
Function that creates a dictionary of constraints, based on a txt file with a
specific format, i.e.:
# lines starting with '#' will be ignored
# multiple constraints per line allowed
mono: A,B; C,D;
mono: E,F;
# final semicolon is optional
not_mono: C,D,E
exc : E,F!G;
See basic generate_constraints function for more info.
"""
function generate_constraints(filename::String)
mono, not_mono, exc = parse_constraints(filename)
generate_constraints(mono = mono, not_mono = not_mono, exc = exc)
end # generate_constraints
"""
generate_constraints!(constraints::ConstraintDict, filename::String)
Function that adds further constraints to an existing dictionary of constraints,
based on a txt file with a specific format (each line one constraint). See basic
generate_constraints function on a file for more info.
"""
function generate_constraints!(constraints::ConstraintDict, filename::String)
mono, not_mono, exc = parse_constraints(filename)
generate_constraints!(constraints; mono = mono, not_mono = not_mono, exc = exc)
end # generate_constraints!
"""
parse_constraints(filename::S
)::Tuple{
Vector{Vector{S}},
Vector{Vector{S}},
Vector{Tuple{Vector{S}, Vector{S}}}
} where S<:AbstractString
--- INTERNAL ---
Helper function that reads constraints out of a file
"""
function parse_constraints(
filename::S,
)::Tuple{
Vector{Vector{S}},
Vector{Vector{S}},
Vector{Tuple{Vector{S},Vector{S}}},
} where {S<:AbstractString}
mono, not_mono = [Vector{String}[] for _ = 1:2]
exc = Vector{Tuple{Vector{AbstractString},Vector{AbstractString}}}()
for line in eachline(filename)
# remove whitespace
line = filter(x -> !isspace(x), line)
# skip comment lines
startswith(line, "#") && continue
split_l::Vector{AbstractString} = split(line, ":")
length(split_l) != 2 && throw(
FileSyntaxError("There should be exactly one colon in each non-comment line."),
)
constraints::Vector{AbstractString} = split(split_l[2], ";")
if split_l[1] == "mono"
for constraint in constraints
# makes sure that parsing works with & without trailing semicolon
constraint == "" && continue
push!(mono, split(constraint, ","))
end # for
elseif split_l[1] == "not_mono"
for constraint in constraints
constraint == "" && continue
push!(not_mono, split(constraint, ","))
end # for
elseif split_l[1] == "exc"
for constraint in constraints
constraint == "" && continue
exc_constraint::Vector{AbstractString} = split(constraint, "!")
length(exc_constraint) != 2 && throw(
FileSyntaxError(
"There should be exactly one exclamation mark in each 'exc' type constraint.",
),
)
push!(exc, (split(exc_constraint[1], ","), split(exc_constraint[2], ",")))
end # for
else
@warn "Skipped line with unsupported constraint type '$split_l[1]'.
Allowed types are 'mono', 'not_mono' and 'exc'"
end # if/else
end # for
return (mono, not_mono, exc)
end # parse_constraints
function topological(constraints::ConstraintDict, tree::GeneralNode)::Bool
isempty(constraints) && return true
for leaves in constraints[:mono]
lca = find_lca(tree, leaves)
lca.root && return false
for child in get_leaves(lca)
!(child.name in leaves) && return false
end # for
end # for
for leaves in constraints[:not_mono]
lca = find_lca(tree, leaves)
length(get_leaves(lca)) == length(leaves) && return false
end # for
for leaves in constraints[:exc]
lca = find_lca(tree, leaves[1])
lca.root && return false
for leaf in leaves[2]
leaf = find_name(tree, leaf)
(lca.binary == leaf.binary[1:length(lca.binary)]) && return false
end # for
end # for
true
end # topological