bug fix to read list of tree files (#92)

- bug fix to read bootstrap trees - model_response -> response, to fix deprecation from StatsBase - ignore .toml in doc - travis: julia 1.1, drop 0.7 - new logo
JuliaPhylo · Mar 24, 2019 · 8cd2533 · 8cd2533
1 parent 5f3d860
commit 8cd2533
Show file tree

Hide file tree

Showing 14 changed files with 54 additions and 40 deletions.
diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,7 @@
 *.jl.mem
 docs/build/
 docs/site/
+docs/*.toml
 
 # Compiled source #
 ###################

diff --git a/.travis.yml b/.travis.yml
@@ -5,8 +5,7 @@ os:
   - linux
   - osx
 julia:
-  - 0.7
-  - 1.0
+  - 1.1
 
 notifications:
   email: false

diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-# PhyloNetworks: analysis for phylogenetic networks
+# PhyloNetworks: analysis for phylogenetic networks <img src="docs/src/logo_text.png" align=right>
 
 [![Build Status](https://travis-ci.org/crsl4/PhyloNetworks.jl.svg)](https://travis-ci.org/crsl4/PhyloNetworks.jl)
 [![](https://img.shields.io/badge/docs-stable-blue.svg)](https://crsl4.github.io/PhyloNetworks.jl/stable)
@@ -42,7 +42,7 @@ If you use the package, please cite
   34(12):3292–3298.
   [doi:10.1093/molbev/msx235](https://doi.org/10.1093/molbev/msx235)
 
-## Maximum pseudolikelihood estimation of species network: SNaQ <img src="http://pages.stat.wisc.edu/~claudia/Images/snaq.png" align=right title="SNaQ logo" width=262.5 height=111>
+## Maximum pseudolikelihood estimation of species network: SNaQ <img src="docs/src/snaq.png" align=right title="SNaQ logo" width=262.5 height=111>
 <!-- ![SNaQ logo](http://pages.stat.wisc.edu/~claudia/Images/snaq.png)
 original size: 525px × 222px-->
 

diff --git a/docs/readme.md b/docs/readme.md
@@ -92,6 +92,8 @@ or interactively in `docs/`:
 
 ```shell
 pkg> activate .
+pkg> status # just to check
+pkg> status --manifest
 pkg> instantiate
 pkg> # dev PhyloPlots # to get the master branch
 pkg> dev ~/.julia/dev/PhyloNetworks

diff --git a/docs/src/assets/logo.png b/docs/src/assets/logo.png
diff --git a/docs/src/lib/internals.md b/docs/src/lib/internals.md
@@ -32,7 +32,7 @@ Order   = [:type]
 ```@autodocs
 Modules = [PhyloNetworks]
 Public = false
-Order   = [:function]
+Order   = [:function, :constant]
 ```
 
 ```@meta

diff --git a/docs/src/logo_text.png b/docs/src/logo_text.png
diff --git a/docs/src/snaq.png b/docs/src/snaq.png
diff --git a/examples/treefilelist.txt b/examples/treefilelist.txt
@@ -0,0 +1,2 @@
+treefile.txt
+treefile.txt
diff --git a/src/bootstrap.jl b/src/bootstrap.jl
@@ -3,23 +3,30 @@
 # Cecile April 2016
 
 """
-    readBootstrapTrees(filename)
+    readBootstrapTrees(listfile; relative2listfile=true)
 
-input: name of file containing the path/name to multiple bootstrap files, one per line.
-Each bootstrap file corresponds to bootstrap trees from a single gene.
+Read the list of file names in `listfile`, then read all the trees in each of
+these files. Output: vector of vectors of trees (networks with h>0 allowed).
 
-output: vector of vectors of trees (networks with h>0 allowed).
+`listfile` should be the name of a file containing the path/name to multiple
+bootstrap files, one on each line (no header). Each named bootstrap file should
+contain multiple trees, one per line (such as bootstrap trees from a single gene).
+
+The path/name to each bootstrap file should be relative to `listfile`.
+Otherwise, use option `relative2listfile=false`, in which case the file names
+are interpreted as usual: relative to the user's current directory
+if not given as absolute paths.
 """
-function readBootstrapTrees(filelist::AbstractString)
-    s = open(filelist) # IOStream
-    bootfile = readdlm(s,AbstractString)
-    close(s)
-    size(bootfile)[2] == 1 ||
-        error("there should be a single bootstrap file name on each row of file $filelist")
-    ngenes = size(bootfile)[1]
+function readBootstrapTrees(filelist::AbstractString; relative2listfile=true::Bool)
+    filelistdir = dirname(filelist)
+    bootfiles = CSV.read(filelist, header=false, types=Dict(1=>String))
+    size(bootfiles)[2] > 0 ||
+        error("there should be a column in file $filelist: with a single bootstrap file name on each row (no header)")
+    ngenes = size(bootfiles)[1]
+    bf = (relative2listfile ? joinpath.(filelistdir, bootfiles[1]) : bootfiles[1])
     treelists = Array{Vector{HybridNetwork}}(undef, ngenes)
     for igene in 1:ngenes
-        treelists[igene] = readMultiTopology(bootfile[igene])
+        treelists[igene] = readMultiTopology(bf[igene])
         print("read $igene/$ngenes bootstrap tree files\r") # using \r for better progress display
     end
     return treelists

diff --git a/src/traits.jl b/src/traits.jl
@@ -1118,7 +1118,7 @@ Dominated by the `LinPredModel` class, from package `GLM`.
 
 The following StatsBase functions can be applied to it:
 `coef`, `nobs`, `vcov`, `stderror`, `confint`, `coeftable`, `dof_residual`, `dof`, `deviance`,
-`residuals`, `model_response`, `predict`, `loglikelihood`, `nulldeviance`, `nullloglikelihood`,
+`residuals`, `response`, `predict`, `loglikelihood`, `nulldeviance`, `nullloglikelihood`,
 `r2`, `adjr2`, `aic`, `aicc`, `bic`.
 
 The following StatsModels functions can also be applied to it:
@@ -1575,7 +1575,7 @@ julia> round.(residuals(fitBM), digits=6)
  -0.211712
  -0.439833
 
-julia> round.(model_response(fitBM), digits=5)
+julia> round.(response(fitBM), digits=5)
 197-element Array{Float64,1}:
  4.44135
  4.32106
@@ -1677,7 +1677,7 @@ function phyloNetworklm(f::Formula,
     else
         mm = ModelMatrix(mf)
     end
-    Y = convert(Vector{Float64},DataFrames.model_response(mf))
+    Y = convert(Vector{Float64}, StatsModels.model_response(mf))
     # Fit the model (Method copied from DataFrame/src/statsmodels/statsmodels.jl, lines 47-58)
     # (then StatsModels/src/statsmodels.jl lines 42-46)
     StatsModels.DataFrameRegressionModel(phyloNetworklm(mm.m, Y, net;
@@ -1728,7 +1728,7 @@ StatsBase.deviance(m::PhyloNetworkLinearModel) = deviance(m.lm)
 # (Rescaled by cholesky of variance between tips)
 StatsBase.residuals(m::PhyloNetworkLinearModel) = m.RL * residuals(m.lm)
 # Tip data
-StatsBase.model_response(m::PhyloNetworkLinearModel) = m.Y
+StatsBase.response(m::PhyloNetworkLinearModel) = m.Y
 # Predicted values at the tips
 # (rescaled by cholesky of tips variances)
 StatsBase.predict(m::PhyloNetworkLinearModel) = m.RL * predict(m.lm)
@@ -1741,8 +1741,8 @@ StatsBase.loglikelihood(m::PhyloNetworkLinearModel) =  loglikelihood(m.lm) - 1/2
 function StatsBase.nulldeviance(m::PhyloNetworkLinearModel)
     vo = ones(length(m.Y), 1)
     vo = m.RL \ vo
-    bo = inv(vo'*vo)*vo'*model_response(m.lm)
-    ro = model_response(m.lm) - vo*bo
+    bo = inv(vo'*vo)*vo'*response(m.lm)
+    ro = response(m.lm) - vo*bo
     return sum(ro.^2)
 end
 # Null Log likelihood (null model with only the intercept)
@@ -1815,6 +1815,7 @@ lambda_estim(m::StatsModels.DataFrameRegressionModel{PhyloNetworkLinearModel,T}
 StatsModels.ModelFrame(m::StatsModels.DataFrameRegressionModel) = m.mf
 StatsModels.ModelMatrix(m::StatsModels.DataFrameRegressionModel) = m.mm
 StatsModels.Formula(m::StatsModels.DataFrameRegressionModel) = Formula(m.mf.terms)
+StatsModels.response(m::StatsModels.DataFrameRegressionModel) = response(m.model)
 
 ### Print the results
 # Variance
@@ -2208,7 +2209,7 @@ julia> ancStates = ancestralStateReconstruction(fitBM) # Should produce a warnin
 │ assuming that the estimated variance rate of evolution is correct.
 │ Additional uncertainty in the estimation of this variance rate is
 │ ignored, so prediction intervals should be larger.
-└ @ PhyloNetworks ~/build/crsl4/PhyloNetworks.jl/src/traits.jl:2162
+└ @ PhyloNetworks ~/build/crsl4/PhyloNetworks.jl/src/traits.jl:2163
 ReconstructedStates:
      Node index     Pred.       Min. Max. (95%)
            -5.0   1.32139  -0.288423     2.9312
@@ -2352,7 +2353,7 @@ julia> ancStates = ancestralStateReconstruction(fitBM);
 │ assuming that the estimated variance rate of evolution is correct.
 │ Additional uncertainty in the estimation of this variance rate is
 │ ignored, so prediction intervals should be larger.
-└ @ PhyloNetworks ~/build/crsl4/PhyloNetworks.jl/src/traits.jl:2162
+└ @ PhyloNetworks ~/build/crsl4/PhyloNetworks.jl/src/traits.jl:2163
 
 julia> expectations(ancStates)
 31×2 DataFrames.DataFrame

diff --git a/test/test_bootstrap.jl b/test/test_bootstrap.jl
@@ -77,4 +77,9 @@ rmprocs(workers())
 @test writeTopology(bootnet[2], round=true, digits=1) == "(5,(((2,(1)#H7:::0.8):0.5,3):0.2,4):0.2,(6,#H7:::0.2):10.0);"
 # "(5,(((2,(1)#H7:::0.751):1.559,4):0.373,3):0.688,(6,#H7:::0.249):10.0);"
 # "(((5,(6,#H7:::0.249):10.0):0.688,3):0.373,(2,(1)#H7:::0.751):1.559,4);"
+
+filelist = joinpath(exdir, "treefilelist.txt")
+boottrees = readBootstrapTrees(filelist)
+@test length(boottrees) == 2
+@test [length(b) for b in boottrees] == [10, 10]
 end
diff --git a/test/test_lm.jl b/test/test_lm.jl
@@ -46,7 +46,7 @@ nullloglik = - 1 / 2 * (ntaxa + ntaxa * log(2 * pi) + ntaxa * log(nullsigma2hat)
 @test coef(phynetlm) ≈ betahat
 @test nobs(phynetlm) ≈ ntaxa
 @test residuals(phynetlm) ≈ resids
-@test model_response(phynetlm) ≈ Y
+@test response(phynetlm) ≈ Y
 @test predict(phynetlm) ≈ fittedValues
 @test dof_residual(phynetlm) ≈ ntaxa-length(betahat)
 @test sigma2_estim(phynetlm) ≈ sigma2hat
@@ -74,7 +74,7 @@ fitbis = phyloNetworklm(@formula(trait ~ 1), dfr, net)
 @test vcov(phynetlm) ≈ vcov(fitbis)
 @test nobs(phynetlm) ≈ nobs(fitbis)
 @test residuals(phynetlm)[fitbis.model.ind] ≈ residuals(fitbis)
-@test model_response(phynetlm)[fitbis.model.ind] ≈ model_response(fitbis)
+@test response(phynetlm)[fitbis.model.ind] ≈ response(fitbis)
 @test predict(phynetlm)[fitbis.model.ind] ≈ predict(fitbis)
 @test dof_residual(phynetlm) ≈ dof_residual(fitbis)
 @test sigma2_estim(phynetlm) ≈ sigma2_estim(fitbis)
@@ -102,7 +102,7 @@ fitlam = phyloNetworklm(@formula(trait ~ 1), dfr, net, model = "lambda", fixedVa
 @test vcov(fitlam) ≈ vcov(fitbis)
 @test nobs(fitlam) ≈ nobs(fitbis)
 @test residuals(fitlam)[fitbis.model.ind] ≈ residuals(fitbis)
-@test model_response(fitlam)[fitbis.model.ind] ≈ model_response(fitbis)
+@test response(fitlam)[fitbis.model.ind] ≈ response(fitbis)
 @test predict(fitlam)[fitbis.model.ind] ≈ predict(fitbis)
 @test dof_residual(fitlam) ≈ dof_residual(fitbis)
 @test sigma2_estim(fitlam) ≈ sigma2_estim(fitbis)
@@ -173,7 +173,7 @@ fitlam = phyloNetworklm(@formula(trait ~ shift_8 + shift_17), dfr, net, model =
 @test vcov(fitlam) ≈ vcov(fitShift)
 @test nobs(fitlam) ≈ nobs(fitShift)
 @test residuals(fitlam) ≈ residuals(fitShift)
-@test model_response(fitlam) ≈ model_response(fitShift)
+@test response(fitlam) ≈ response(fitShift)
 @test predict(fitlam) ≈ predict(fitShift)
 @test dof_residual(fitlam) ≈ dof_residual(fitShift)
 @test sigma2_estim(fitlam) ≈ sigma2_estim(fitShift)
@@ -271,7 +271,7 @@ nullloglik = - 1 / 2 * (ntaxa + ntaxa * log(2 * pi) + ntaxa * log(nullsigma2hat)
 @test coef(fit_mat) ≈ betahat
 @test nobs(fit_mat) ≈ ntaxa
 @test residuals(fit_mat) ≈ resids
-@test model_response(fit_mat) ≈ B
+@test response(fit_mat) ≈ B
 @test predict(fit_mat) ≈ fittedValues
 @test dof_residual(fit_mat) ≈ ntaxa-length(betahat)
 @test sigma2_estim(fit_mat) ≈ sigma2hat
@@ -297,7 +297,7 @@ phynetlm = phyloNetworklm(@formula(trait ~ pred), dfr, net)
 @test vcov(phynetlm) ≈ vcov(fit_mat)
 @test nobs(phynetlm) ≈ nobs(fit_mat)
 @test residuals(phynetlm) ≈ residuals(fit_mat)
-@test model_response(phynetlm) ≈ model_response(fit_mat)
+@test response(phynetlm) ≈ response(fit_mat)
 @test predict(phynetlm) ≈ predict(fit_mat)
 @test dof_residual(phynetlm) ≈ dof_residual(fit_mat)
 @test sigma2_estim(phynetlm) ≈ sigma2_estim(fit_mat)
@@ -323,7 +323,7 @@ fitbis = phyloNetworklm(@formula(trait ~ pred), dfr, net)
 @test vcov(phynetlm) ≈ vcov(fitbis)
 @test nobs(phynetlm) ≈ nobs(fitbis)
 @test residuals(phynetlm)[fitbis.model.ind] ≈ residuals(fitbis)
-@test model_response(phynetlm)[fitbis.model.ind] ≈ model_response(fitbis)
+@test response(phynetlm)[fitbis.model.ind] ≈ response(fitbis)
 @test predict(phynetlm)[fitbis.model.ind] ≈ predict(fitbis)
 @test dof_residual(phynetlm) ≈ dof_residual(fitbis)
 @test sigma2_estim(phynetlm) ≈ sigma2_estim(fitbis)
@@ -349,7 +349,7 @@ fitter = (@test_logs (:info, r"^As requested \(no_names=true\)")  match_mode=:an
 @test vcov(phynetlm) ≈ vcov(fitter)
 @test nobs(phynetlm) ≈ nobs(fitter)
 @test residuals(phynetlm) ≈ residuals(fitter)
-@test model_response(phynetlm) ≈ model_response(fitter)
+@test response(phynetlm) ≈ response(fitter)
 @test predict(phynetlm) ≈ predict(fitter)
 @test dof_residual(phynetlm) ≈ dof_residual(fitter)
 @test sigma2_estim(phynetlm) ≈ sigma2_estim(fitter)
@@ -384,7 +384,7 @@ fitnabis = phyloNetworklm(@formula(trait ~ pred), dfr, net)
 @test vcov(fitna) ≈ vcov(fitnabis)
 @test nobs(fitna) ≈ nobs(fitnabis)
 @test sort(residuals(fitna)) ≈ sort(residuals(fitnabis))
-@test sort(model_response(fitna)) ≈ sort(model_response(fitnabis))
+@test sort(response(fitna)) ≈ sort(response(fitnabis))
 @test sort(predict(fitna)) ≈ sort(predict(fitnabis))
 @test dof_residual(fitna) ≈ dof_residual(fitnabis)
 @test sigma2_estim(fitna) ≈ sigma2_estim(fitnabis)
@@ -410,7 +410,7 @@ fitlam = phyloNetworklm(@formula(trait ~ pred), dfr, net, model = "lambda", fixe
 @test vcov(fitlam) ≈ vcov(fitnabis)
 @test nobs(fitlam) ≈ nobs(fitnabis)
 @test residuals(fitlam) ≈ residuals(fitnabis)
-@test model_response(fitlam) ≈ model_response(fitnabis)
+@test response(fitlam) ≈ response(fitnabis)
 @test predict(fitlam) ≈ predict(fitnabis)
 @test dof_residual(fitlam) ≈ dof_residual(fitnabis)
 @test sigma2_estim(fitlam) ≈ sigma2_estim(fitnabis)
@@ -581,7 +581,7 @@ nullloglik = - 1 / 2 * (ntaxa + ntaxa * log(2 * pi) + ntaxa * log(nullsigma2hat)
 
 @test nobs(phynetlm) ≈ ntaxa
 @test residuals(phynetlm) ≈ resids
-@test model_response(phynetlm) ≈ Y
+@test response(phynetlm) ≈ Y
 @test predict(phynetlm) ≈ fittedValues
 @test dof_residual(phynetlm) ≈ ntaxa
 @test sigma2_estim(phynetlm) ≈ sigma2hat
@@ -603,7 +603,7 @@ fitbis = phyloNetworklm(@formula(trait ~ -1), dfr, net)
 #@test vcov(phynetlm) ≈ vcov(fitbis)
 @test nobs(phynetlm) ≈ nobs(fitbis)
 @test residuals(phynetlm)[fitbis.model.ind] ≈ residuals(fitbis)
-@test model_response(phynetlm)[fitbis.model.ind] ≈ model_response(fitbis)
+@test response(phynetlm)[fitbis.model.ind] ≈ response(fitbis)
 @test predict(phynetlm)[fitbis.model.ind] ≈ predict(fitbis)
 @test dof_residual(phynetlm) ≈ dof_residual(fitbis)
 @test sigma2_estim(phynetlm) ≈ sigma2_estim(fitbis)

diff --git a/test/test_lm_tree.jl b/test/test_lm_tree.jl
@@ -70,7 +70,6 @@ tmp = predict(fitBM);
 #println("are they all 4.6789989001?")
 # next: looks random. sometimes passes, most times fails
 @test predict(fitBM) ≈ [4.6789989001 for i in 1:197] atol=1e-8
-# @test_approx_eq_eps model_response(fitBM)[fitbis.model.ind] model_response(fitbis)
 # @test_approx_eq_eps deviance(fitBM)  deviance(fitbis)
 # @test_approx_eq_eps nulldeviance(fitBM)  nulldeviance(fitbis)
 # @test_approx_eq_eps nullloglikelihood(fitBM)  nullloglikelihood(fitbis)
@@ -400,7 +399,6 @@ vcovR =  [0.0200086273  -0.0136717540 0.0084815090  -0.0093192029 -0.0114417825
 @test stderror(fitBM) ≈ [0.1414518551,0.1361605540,0.1321542330,0.1295968341,0.2214683008,0.1820427154,0.0672106202,0.0965879311,0.0864973651] atol=1e-10
 @test confint(fitBM)[:,1] ≈ [2.5115945339,-0.4715366529,0.7207474097,-0.3595508202,-0.8102854443,-0.2058583178,-0.0960507369,-0.0112932922,-0.2213931131] atol=1e-10 norm=x->LinearAlgebra.norm(x,Inf)
 @test confint(fitBM)[:,2] ≈ [3.0735480006,0.0693957746,1.2457637082,0.1553055609,0.0695537019,0.5173526640,0.1709605441,0.3724268272,0.1222396666] atol=1e-10
-# @test_approx_eq_eps model_response(fitBM)[fitbis.model.ind] model_response(fitbis)
 # @test_approx_eq_eps predict(fitBM)[fitbis.model.ind] predict(fitbis)
 # @test_approx_eq_eps deviance(fitBM)  deviance(fitbis)
 # @test_approx_eq_eps nulldeviance(fitBM)  nulldeviance(fitbis)
@@ -485,7 +483,6 @@ vcovR =  [0.0200251600  -0.0137474015 0.0085637021  -0.0092973836 -0.0114259722
 @test stderror(fitLambda) ≈ [0.1415102824,0.1367059706,0.1327404019,0.1294070617,0.2213803048,0.1817274626,0.0671133793,0.0966096332,0.0863718011] atol=1e-6
 @test confint(fitLambda)[:,1] ≈ [2.5129645499,-0.4782080775,0.7260358930,-0.3575353260,-0.8075438955,-0.2033049779,-0.0965491169,-0.0126529301,-0.2220960868] atol=1e-5
 @test confint(fitLambda)[:,2] ≈ [3.0751501341,0.0648911562,1.2533808968,0.1565671360,0.0719456641,0.5186535822,0.1700758500,0.3711534067,0.1210378584] atol=1e-5
-# @test_approx_eq_eps model_response(fitLambda)[fitbis.model.ind] model_response(fitbis)
 # @test_approx_eq_eps predict(fitLambda)[fitbis.model.ind] predict(fitbis)
 # @test_approx_eq_eps deviance(fitLambda)  deviance(fitbis)
 # @test_approx_eq_eps nulldeviance(fitLambda)  nulldeviance(fitbis)