Skip to content

Commit

Permalink
Merge 750ea0c into 1a97930
Browse files Browse the repository at this point in the history
  • Loading branch information
cecileane committed May 21, 2020
2 parents 1a97930 + 750ea0c commit ea7ba4a
Show file tree
Hide file tree
Showing 9 changed files with 485 additions and 202 deletions.
2 changes: 1 addition & 1 deletion src/PhyloNetworks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ module PhyloNetworks
# stdlib (standard libraries)
using Dates
using Distributed
using LinearAlgebra # for LowerTriangular, logdet, diag
using LinearAlgebra # for LowerTriangular, logdet, diag, mul!
using Printf: @printf, @sprintf
using Random
using Statistics: mean, quantile, median
Expand Down
9 changes: 5 additions & 4 deletions src/auxiliary.jl
Original file line number Diff line number Diff line change
Expand Up @@ -409,10 +409,11 @@ function isconnected(node1, node2)
end

# function to check in an edge is in an array by comparing
# the edges numbers (uses isEqual)
# edge numbers (could use isEqual for adding comparisons of gammaz and inCycle)
# needed for updateHasEdge
function isEdgeNumIn(edge::Edge,array::Array{Edge,1})
return all((e->!isEqual(edge,e)), array) ? false : true
enum = edge.number
return any(e -> e.number == enum, array)
end

# function to check in a leaf is in an array by comparing
Expand Down Expand Up @@ -1695,8 +1696,8 @@ Warning: assumes that there aren't "duplicated" edges, that is, no 2-cycles.
function adjacentedges(centeredge::Edge)
n = centeredge.node
length(n) == 2 || error("center edge is connected to $(length(n)) nodes")
edges = copy(n[1].edge) # shallow copy, to avoid modifying the first node
for ei in n[2].edge
@inbounds edges = copy(n[1].edge) # shallow copy, to avoid modifying the first node
@inbounds for ei in n[2].edge
ei === centeredge && continue # don't add the center edge again
# a second edge between nodes n[1] and n[2] would appear twice
push!(edges, ei)
Expand Down
12 changes: 6 additions & 6 deletions src/parsimony.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Assumes a *tree* (no reticulation) and correct isChild1 attribute.
output: dictionary with state sets and most parsimonious score
"""
function parsimonyBottomUpFitch!(node::Node, possibleStates::Dict{Int64,Set{T}}, parsimonyscore::Array{Int64,1}) where {T}
function parsimonyBottomUpFitch!(node::Node, possibleStates::Dict{Int,Set{T}}, parsimonyscore::Array{Int,1}) where {T}
node.leaf && return # change nothing if leaf
childrenStates = Set{T}[] # state sets for the 2 (or more) children
for e in node.edge
Expand Down Expand Up @@ -48,7 +48,7 @@ the state of the root. Assumes a *tree*: no reticulation.
output: dictionary with state sets
"""
function parsimonyTopDownFitch!(node::Node, possibleStates::Dict{Int64,Set{T}}) where {T}
function parsimonyTopDownFitch!(node::Node, possibleStates::Dict{Int,Set{T}}) where {T}
for e in node.edge
child = e.node[e.isChild1 ? 1 : 2]
if child == node continue; end # exclude parent edges
Expand All @@ -67,7 +67,7 @@ end
summarize character states at nodes, assuming a *tree*
"""
function parsimonySummaryFitch(tree::HybridNetwork, nodestates::Dict{Int64,Set{T}}) where {T}
function parsimonySummaryFitch(tree::HybridNetwork, nodestates::Dict{Int,Set{T}}) where {T}
println("node number => character states on tree ",
writeTopology(tree,di=true,round=true,digits=1))
for n in tree.node
Expand Down Expand Up @@ -98,7 +98,7 @@ where the union is taken over displayed trees with the MP score.
function parsimonyDiscreteFitch(net::HybridNetwork, tips::Dict{String,T}) where {T}
# T = type of characters. Typically Int if data are binary 0-1
# initialize dictionary: node number -> admissible character states
possibleStates = Dict{Int64,Set{T}}()
possibleStates = Dict{Int,Set{T}}()
for l in net.leaf
if haskey(tips, l.name)
possibleStates[l.number] = Set(tips[l.name])
Expand All @@ -110,7 +110,7 @@ function parsimonyDiscreteFitch(net::HybridNetwork, tips::Dict{String,T}) where
directEdges!(net) # parsimonyBottomUpFitch! uses isChild1 attributes
trees = displayedTrees(net, 0.0) # all displayed trees
mpscore = Int[] # one score for each tree
statesets = Dict{Int64,Set{T}}[] # one state set dict per tree
statesets = Dict{Int,Set{T}}[] # one state set dict per tree
for tree in trees
statedict = deepcopy(possibleStates)
parsimonyscore = [0] # initialization, mutable
Expand Down Expand Up @@ -1263,7 +1263,7 @@ function maxParsimonyNet(currT::HybridNetwork, df::DataFrame;
end
end
tend = time_ns() # in nanoseconds
telapsed = round(convert(Int64, tend-tstart) * 1e-9, digits=2) # in seconds
telapsed = round(convert(Int, tend-tstart) * 1e-9, digits=2) # in seconds
writelog_1proc && close(errfile)
msg = "\n" * Dates.format(Dates.now(), "yyyy-mm-dd H:M:S.s")
if writelog
Expand Down
521 changes: 384 additions & 137 deletions src/phyLiNCoptimization.jl

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/readData.jl
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,7 @@ function countquartetsintrees(tree::Vector{HybridNetwork},
end
function countquartetsintrees!(quartet::Vector{QuartetT{MVector{4,Float64}}},
tree::HybridNetwork, whichQ::Symbol, weight_byallele::Bool, nCk::Matrix,
taxonnumber::Dict{String,Int64}, taxonmap::Dict{String,String})
taxonnumber::Dict{String,Int}, taxonmap::Dict{String,String})
tree.numHybrids == 0 || error("input phylogenies must be trees")
# next: reset node & edge numbers so that they can be used as indices: 1,2,3,...
resetNodeNumbers!(tree; checkPreorder=true, type=:postorder) # leaves first & post-order
Expand Down
2 changes: 1 addition & 1 deletion src/snaq_optimization.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1594,7 +1594,7 @@ function optTopRuns!(currT0::HybridNetwork, liktolAbs::Float64, Nfail::Integer,
end
end
tend = time_ns() # in nanoseconds
telapsed = round(convert(Int64, tend-tstart) * 1e-9, digits=2) # in seconds
telapsed = round(convert(Int, tend-tstart) * 1e-9, digits=2) # in seconds
writelog_1proc && close(errfile)
msg = "\n" * Dates.format(Dates.now(), "yyyy-mm-dd H:M:S.s")
if writelog
Expand Down
13 changes: 9 additions & 4 deletions src/substitutionModels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ julia> PhyloNetworks.P(m1, 0.2)
@inline function P(obj::SM, t::Float64)
t >= 0.0 || error("substitution model: >=0 branch lengths are needed")
k = nstates(obj)
Pmat = MMatrix{k,k}(repeat([0.0], k*k))
Pmat = MMatrix{k,k,Float64}(undef)
return P!(Pmat, obj, t)
end

Expand Down Expand Up @@ -911,8 +911,11 @@ end
function P!(Pmat::AbstractMatrix, obj::JC69, t::Float64)
P0 = 0.25 + 0.75 * exp(-t * obj.eigeninfo[1]) #lambda
P1 = 0.25 - 0.25 * exp(-t * obj.eigeninfo[1])
Pmat[:,:] .= P1 # puts P1 on the off-diagonals (in particular)
for i in 1:4 Pmat[i,i]=P0; end # diagonal
# P1 off-diagonal and P0 on diagonal
Pmat[1,1] = P0; Pmat[2,1] = P1; Pmat[3,1] = P1; Pmat[4,1] = P1
Pmat[1,2] = P1; Pmat[2,2] = P0; Pmat[3,2] = P1; Pmat[4,2] = P1
Pmat[1,3] = P1; Pmat[2,3] = P1; Pmat[3,3] = P0; Pmat[4,3] = P1
Pmat[1,4] = P1; Pmat[2,4] = P1; Pmat[3,4] = P1; Pmat[4,4] = P0
return Pmat
end

Expand Down Expand Up @@ -1043,7 +1046,9 @@ end

function Base.show(io::IO, obj::RateVariationAcrossSites)
str = "Rate Variation Across Sites using Discretized Gamma Model\n"
str *= "alpha: $(round(obj.alpha, digits=5))\n"
if length(obj.ratemultiplier)>1
str *= "alpha: $(round(obj.alpha, digits=5))\n"
end
str *= "categories for Gamma discretization: $(obj.ncat)\n"
str *= "ratemultiplier: $(round.(obj.ratemultiplier, digits=5))\n"
print(io, str)
Expand Down
85 changes: 53 additions & 32 deletions src/traitsLikDiscrete.jl
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ mutable struct StatisticalSubstitutionModel <: StatsBase.StatisticalModel
"log-likelihood of site k"
_sitecache::Array{Float64,1} # size: nsites
"log-likelihood of ith displayed tree t, given rate category j, of site k"
_loglikcache::Array{Float64, 3} # size: ntrees, nrates, nsites
_loglikcache::Array{Float64, 3} # size: nsites, nrates, ntrees

"inner (default) constructor: from model, rate model, network, trait and site weights"
function StatisticalSubstitutionModel(model::SubstitutionModel,
Expand Down Expand Up @@ -97,7 +97,7 @@ mutable struct StatisticalSubstitutionModel <: StatsBase.StatisticalModel
directlik = zeros(Float64, k, maxedges)
backwardlik= zeros(Float64, k, maxnodes)
_sitecache = Vector{Float64}(undef, nsites)
_loglikcache = zeros(Float64, ntrees, length(ratemodel.ratemultiplier), nsites)
_loglikcache = zeros(Float64, nsites, length(ratemodel.ratemultiplier), ntrees)
new(deepcopy(model), deepcopy(ratemodel), prioratroot,
net, trait, nsites, siteweight, totalsiteweight, missing, # missing log likelihood
logtrans, trees,
Expand Down Expand Up @@ -571,15 +571,34 @@ They are re-used for each displayed tree, which is why edges are not fused
around degree-2 nodes when extracting displayed trees.
"""
function update_logtrans(obj::SSM)
startingP = P(obj.model, 1.0) # same memory re-used for all branches below (t=1 arbitrary)
rates = obj.ratemodel.ratemultiplier
k = nstates(obj.model)
Ptmp = MMatrix{k,k,Float64}(undef) # memory to be re-used
for edge in obj.net.edge # update logtrans: same for all displayed trees, all traits
for i = 1:length(obj.ratemodel.ratemultiplier)
obj.logtrans[:,:,edge.number, i] = log.(P!(startingP, obj.model,
edge.length*obj.ratemodel.ratemultiplier[i])) # element-wise
enum = edge.number
len = edge.length
for i = 1:length(rates)
obj.logtrans[:,:,enum, i] .= log.(P!(Ptmp, obj.model, len * rates[i]))
end
end
end

"""
update_logtrans(obj::SSM, edge::Edge)
Update the log-transition probabilities associates to one particular `edge`
in the network.
"""
function update_logtrans(obj::SSM, edge::Edge)
rates = obj.ratemodel.ratemultiplier
enum = edge.number
len = edge.length
for i in 1:length(rates)
pmat = view(obj.logtrans, :,:,enum,i)
@inbounds pmat .= log.(P!(pmat, obj.model, len * rates[i]))
end
end

"""
discrete_corelikelihood!(obj::StatisticalSubstitutionModel;
whichtrait::AbstractVector{Int} = 1:obj.nsites)
Expand All @@ -600,19 +619,20 @@ function discrete_corelikelihood!(obj::SSM; whichtrait::AbstractVector{Int} = 1:
for t in 1:nt
for ri in 1:nr
for ci in whichtrait
obj._loglikcache[t,ri,ci] = discrete_corelikelihood_trait!(obj,t,ci,ri)
obj._loglikcache[ci,ri,t] = discrete_corelikelihood_trait!(obj,t,ci,ri)
# note: -Inf expected if 2 tips have different states, but separated by path of total length 0.0
end
end
end
# aggregate over trees and rates
obj._loglikcache[1:nt,:,:] .+= obj.priorltw
for ti in 1:nt
obj._loglikcache[:,:,ti] .+= obj.priorltw[ti]
end
# obj._loglikcache .-= log(nr)
# done below in 1 instead ntrees x nrates x nsites calculations, but _loglikcache not modified
# done below in 1 instead nsites x nrates x ntrees calculations, but _loglikcache not modified
for ci in whichtrait
obj._sitecache[ci] = logsumexp(view(obj._loglikcache, 1:nt,:,ci))
obj._sitecache[ci] = logsumexp(view(obj._loglikcache, ci,:,1:nt))
end
# siteliks = dropdims(mapslices(logsumexp, view(obj._loglikcache, 1:nt,:,:), dims=[1,2]); dims=(1,2))
if obj.siteweight !== nothing
obj._sitecache .*= obj.siteweight
end
Expand All @@ -623,21 +643,18 @@ function discrete_corelikelihood!(obj::SSM; whichtrait::AbstractVector{Int} = 1:
end

"""
discrete_corelikelihood_trait!(
obj::SSM, t::Integer, ci::Integer, ri::Integer,
forwardlik::AbstractArray{Float64, 2} = obj.forwardlik,
directlik::AbstractArray{Float64, 2} = obj.directlik)
discrete_corelikelihood_trait!(obj::SSM, t::Integer, ci::Integer, ri::Integer)
Return the likelihood for tree `t`, trait (character/site) index `ci` and rate category `ri`.
Update & modify the forward & directional log-likelihoods `forwardlik` and `directlik`
(in `obj` by default), which are indexed by [state, node_number or edge_number].
Update & modify the forward & directional log-likelihoods `obj.forwardlik`
and `obj.directlik`, which are indexed by [state, node_number or edge_number].
Used by [`discrete_corelikelihood!`](@ref).
**Preconditions**: `obj.logtrans` updated, edges directed, nodes/edges preordered
"""
function discrete_corelikelihood_trait!(obj::SSM, t::Integer, ci::Integer, ri::Integer,
forwardlik::AbstractArray{Float64, 2} = obj.forwardlik,
directlik::AbstractArray{Float64, 2} = obj.directlik)
function discrete_corelikelihood_trait!(obj::SSM, t::Integer, ci::Integer, ri::Integer)
forwardlik = obj.forwardlik
directlik = obj.directlik
tree = obj.displayedtree[t]
k = nstates(obj.model) # also = size(logtrans,1) if not RateVariationAcrossSites
fill!(forwardlik, 0.0) # re-initialize for each trait, each iteration
Expand Down Expand Up @@ -719,9 +736,12 @@ julia> round.(exp.(fit.priorltw), digits=4) # the prior tree probabilities are s
```
"""
function posterior_logtreeweight(obj::SSM, trait = 1)
# ts[tree,site] = log P(data and tree) at site
ts = dropdims(mapslices(logsumexp, view(obj._loglikcache, :,:,trait), dims=2);dims=2)
siteliks = mapslices(logsumexp, ts, dims=[1]) # 1 x ntraits array (or 1-element vector)
# ts[site,tree] = log P(data and tree) at site, integrated over rates
d = length(size(trait)) # 0 if single trait, 1 if vector of several traits
ts = dropdims(mapslices(logsumexp, view(obj._loglikcache, trait,:,:),
dims=d+1); dims=1)
if d>0 ts = permutedims(ts); end # now: ts[tree] or ts[tree,site]
siteliks = mapslices(logsumexp, ts, dims=1) # 1 x ntraits array (or 1-element vector)
# -log(nr) missing from both ts and siteliks, but cancels out next
ts .-= siteliks
return ts
Expand Down Expand Up @@ -924,7 +944,7 @@ function ancestralStateReconstruction(obj::SSM, trait::Integer = 1)
# update forward & directional likelihoods
discrete_corelikelihood_trait!(obj,t,trait,ri)
# update backward likelihoods
discrete_backwardlikelihood_trait!(obj,t,trait,ri)
discrete_backwardlikelihood_trait!(obj,t,ri)
# P{state i at node n} ∝ bkd[i,n] * frd[i,n] given tree & rate:
# res = logaddexp(res, ltw[t] + bkd + frd) --- -log(nr) will cancel out
broadcast!(logaddexp, res, res, ltprior .+ bkd + frd)
Expand All @@ -944,19 +964,20 @@ function ancestralStateReconstruction(obj::SSM, trait::Integer = 1)
end

"""
discrete_backwardlikelihood_trait!(obj::SSM, tree::Integer, trait::Integer,
ri::Integer=1, backwardlik=obj.backwardlik, directlik=obj.directlik)
discrete_backwardlikelihood_trait!(obj::SSM, tree::Integer, ri::Integer)
Update and return the backward likelihood (last argument `backwardlik`)
for trait index `trait`, assuming rate category `ri` and tree index `tree`.
assuming rate category `ri` and tree index `tree`,
using current forward and backwards likelihoods in `obj`:
these depend on the trait (or site) given to the last call to
`discrete_corelikelihood_trait!`.
Used by `ancestralStateReconstruction`.
**warning**: assume correct forward likelihood, directional likelihood
and transition probabilities.
**warning**: assume correct transition probabilities.
"""
function discrete_backwardlikelihood_trait!(obj::SSM, t::Integer, trait::Integer, ri::Integer,
backwardlik = obj.backwardlik,
directlik = obj.directlik)
function discrete_backwardlikelihood_trait!(obj::SSM, t::Integer, ri::Integer)
backwardlik = obj.backwardlik
directlik = obj.directlik
tree = obj.displayedtree[t]
k = nstates(obj.model)
fill!(backwardlik, 0.0) # re-initialize for each trait, each iteration
Expand Down

0 comments on commit ea7ba4a

Please sign in to comment.